{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use Off-Policy VPG to Play CartPole-v0\n",
    "\n",
    "TensorFlow version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import sys\n",
    "import logging\n",
    "import itertools\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "import matplotlib.pyplot as plt\n",
    "import tensorflow.compat.v2 as tf\n",
    "tf.random.set_seed(0)\n",
    "from tensorflow import keras\n",
    "from tensorflow import nn\n",
    "from tensorflow import optimizers\n",
    "from tensorflow.keras import layers\n",
    "from tensorflow.keras import losses\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "11:29:13 [INFO] env: <CartPoleEnv<CartPole-v0>>\n",
      "11:29:13 [INFO] action_space: Discrete(2)\n",
      "11:29:13 [INFO] observation_space: Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)\n",
      "11:29:13 [INFO] reward_range: (-inf, inf)\n",
      "11:29:13 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}\n",
      "11:29:13 [INFO] _max_episode_steps: 200\n",
      "11:29:13 [INFO] _elapsed_steps: None\n",
      "11:29:13 [INFO] id: CartPole-v0\n",
      "11:29:13 [INFO] entry_point: gym.envs.classic_control:CartPoleEnv\n",
      "11:29:13 [INFO] reward_threshold: 195.0\n",
      "11:29:13 [INFO] nondeterministic: False\n",
      "11:29:13 [INFO] max_episode_steps: 200\n",
      "11:29:13 [INFO] _kwargs: {}\n",
      "11:29:13 [INFO] _env_name: CartPole\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('CartPole-v0')\n",
    "env.seed(0)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class OffPolicyVPGAgent:\n",
    "    def __init__(self, env):\n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = 0.99\n",
    "\n",
    "        def dot(y_true, y_pred):\n",
    "            return -tf.reduce_sum(y_true * y_pred, axis=-1)\n",
    "\n",
    "        self.policy_net = self.build_net(hidden_sizes=[],\n",
    "                output_size=self.action_n,\n",
    "                output_activation=nn.softmax,\n",
    "                loss=dot, learning_rate=0.06)\n",
    "\n",
    "    def build_net(self, hidden_sizes, output_size,\n",
    "            activation=nn.relu, output_activation=None,\n",
    "            use_bias=False, loss=losses.mse, learning_rate=0.001):\n",
    "        model = keras.Sequential()\n",
    "        for hidden_size in hidden_sizes:\n",
    "            model.add(layers.Dense(units=hidden_size,\n",
    "                    activation=activation, use_bias=use_bias))\n",
    "        model.add(layers.Dense(units=output_size,\n",
    "                activation=output_activation, use_bias=use_bias))\n",
    "        optimizer = optimizers.Adam(learning_rate)\n",
    "        model.compile(optimizer=optimizer, loss=loss)\n",
    "        return model\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        if self.mode == 'train':\n",
    "            action = np.random.choice(self.action_n) # use random policy\n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "        else:\n",
    "            probs = self.policy_net.predict(observation[np.newaxis])[0]\n",
    "            action = np.random.choice(self.action_n, p=probs)\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        if self.mode == 'train':\n",
    "            self.learn()\n",
    "\n",
    "    def learn(self):\n",
    "        df = pd.DataFrame(np.array(self.trajectory, dtype=object).reshape(-1, 4),\n",
    "                columns=['state', 'reward', 'done', 'action'])\n",
    "        df['discount'] = self.gamma ** df.index.to_series()\n",
    "        df['discounted_reward'] = df['discount'] * df['reward'].astype(float)\n",
    "        df['discounted_return'] = df['discounted_reward'][::-1].cumsum()\n",
    "        states = np.stack(df['state'])\n",
    "        actions = np.eye(self.action_n)[df['action'].astype(int)]\n",
    "        df['behavior_prob'] = 1. / self.action_n\n",
    "        df['sample_weight'] = df['discounted_return'] / df['behavior_prob']\n",
    "        sample_weight = df[['sample_weight',]].values\n",
    "        self.policy_net.fit(states, actions, sample_weight=sample_weight, verbose=0)\n",
    "\n",
    "\n",
    "agent = OffPolicyVPGAgent(env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "11:29:14 [INFO] ==== train ====\n",
      "11:29:15 [INFO] NumExpr defaulting to 4 threads.\n",
      "11:29:20 [DEBUG] train episode 0: reward = 34.00, steps = 34\n",
      "11:29:21 [DEBUG] train episode 1: reward = 16.00, steps = 16\n",
      "11:29:22 [DEBUG] train episode 2: reward = 20.00, steps = 20\n",
      "11:29:23 [DEBUG] train episode 3: reward = 17.00, steps = 17\n",
      "11:29:25 [DEBUG] train episode 4: reward = 26.00, steps = 26\n",
      "11:29:25 [DEBUG] train episode 5: reward = 12.00, steps = 12\n",
      "11:29:27 [DEBUG] train episode 6: reward = 25.00, steps = 25\n",
      "11:29:28 [DEBUG] train episode 7: reward = 19.00, steps = 19\n",
      "11:29:30 [DEBUG] train episode 8: reward = 37.00, steps = 37\n",
      "11:29:31 [DEBUG] train episode 9: reward = 27.00, steps = 27\n",
      "11:29:32 [DEBUG] train episode 10: reward = 16.00, steps = 16\n",
      "11:29:33 [DEBUG] train episode 11: reward = 23.00, steps = 23\n",
      "11:29:34 [DEBUG] train episode 12: reward = 12.00, steps = 12\n",
      "11:29:35 [DEBUG] train episode 13: reward = 24.00, steps = 24\n",
      "11:29:36 [DEBUG] train episode 14: reward = 10.00, steps = 10\n",
      "11:29:37 [DEBUG] train episode 15: reward = 25.00, steps = 25\n",
      "11:29:38 [DEBUG] train episode 16: reward = 25.00, steps = 25\n",
      "11:29:40 [DEBUG] train episode 17: reward = 34.00, steps = 34\n",
      "11:29:41 [DEBUG] train episode 18: reward = 13.00, steps = 13\n",
      "11:29:42 [DEBUG] train episode 19: reward = 17.00, steps = 17\n",
      "11:29:43 [DEBUG] train episode 20: reward = 11.00, steps = 11\n",
      "11:29:44 [DEBUG] train episode 21: reward = 19.00, steps = 19\n",
      "11:29:48 [DEBUG] train episode 22: reward = 75.00, steps = 75\n",
      "11:29:49 [DEBUG] train episode 23: reward = 20.00, steps = 20\n",
      "11:29:52 [DEBUG] train episode 24: reward = 55.00, steps = 55\n",
      "11:29:53 [DEBUG] train episode 25: reward = 19.00, steps = 19\n",
      "11:29:56 [DEBUG] train episode 26: reward = 70.00, steps = 70\n",
      "11:29:56 [DEBUG] train episode 27: reward = 11.00, steps = 11\n",
      "11:29:58 [DEBUG] train episode 28: reward = 24.00, steps = 24\n",
      "11:30:02 [DEBUG] train episode 29: reward = 81.00, steps = 81\n",
      "11:30:05 [DEBUG] train episode 30: reward = 68.00, steps = 68\n",
      "11:30:10 [DEBUG] train episode 31: reward = 111.00, steps = 111\n",
      "11:30:12 [DEBUG] train episode 32: reward = 44.00, steps = 44\n",
      "11:30:14 [DEBUG] train episode 33: reward = 42.00, steps = 42\n",
      "11:30:15 [DEBUG] train episode 34: reward = 24.00, steps = 24\n",
      "11:30:18 [DEBUG] train episode 35: reward = 65.00, steps = 65\n",
      "11:30:20 [DEBUG] train episode 36: reward = 46.00, steps = 46\n",
      "11:30:23 [DEBUG] train episode 37: reward = 59.00, steps = 59\n",
      "11:30:27 [DEBUG] train episode 38: reward = 96.00, steps = 96\n",
      "11:30:30 [DEBUG] train episode 39: reward = 61.00, steps = 61\n",
      "11:30:33 [DEBUG] train episode 40: reward = 49.00, steps = 49\n",
      "11:30:37 [DEBUG] train episode 41: reward = 105.00, steps = 105\n",
      "11:30:41 [DEBUG] train episode 42: reward = 83.00, steps = 83\n",
      "11:30:46 [DEBUG] train episode 43: reward = 102.00, steps = 102\n",
      "11:30:48 [DEBUG] train episode 44: reward = 41.00, steps = 41\n",
      "11:30:51 [DEBUG] train episode 45: reward = 58.00, steps = 58\n",
      "11:30:53 [DEBUG] train episode 46: reward = 55.00, steps = 55\n",
      "11:30:56 [DEBUG] train episode 47: reward = 51.00, steps = 51\n",
      "11:30:58 [DEBUG] train episode 48: reward = 55.00, steps = 55\n",
      "11:31:03 [DEBUG] train episode 49: reward = 102.00, steps = 102\n",
      "11:31:06 [DEBUG] train episode 50: reward = 58.00, steps = 58\n",
      "11:31:09 [DEBUG] train episode 51: reward = 81.00, steps = 81\n",
      "11:31:11 [DEBUG] train episode 52: reward = 24.00, steps = 24\n",
      "11:31:13 [DEBUG] train episode 53: reward = 50.00, steps = 50\n",
      "11:31:16 [DEBUG] train episode 54: reward = 76.00, steps = 76\n",
      "11:31:19 [DEBUG] train episode 55: reward = 62.00, steps = 62\n",
      "11:31:22 [DEBUG] train episode 56: reward = 51.00, steps = 51\n",
      "11:31:26 [DEBUG] train episode 57: reward = 88.00, steps = 88\n",
      "11:31:28 [DEBUG] train episode 58: reward = 40.00, steps = 40\n",
      "11:31:31 [DEBUG] train episode 59: reward = 71.00, steps = 71\n",
      "11:31:34 [DEBUG] train episode 60: reward = 70.00, steps = 70\n",
      "11:31:36 [DEBUG] train episode 61: reward = 39.00, steps = 39\n",
      "11:31:38 [DEBUG] train episode 62: reward = 51.00, steps = 51\n",
      "11:31:41 [DEBUG] train episode 63: reward = 56.00, steps = 56\n",
      "11:31:44 [DEBUG] train episode 64: reward = 54.00, steps = 54\n",
      "11:31:47 [DEBUG] train episode 65: reward = 70.00, steps = 70\n",
      "11:31:50 [DEBUG] train episode 66: reward = 68.00, steps = 68\n",
      "11:31:53 [DEBUG] train episode 67: reward = 61.00, steps = 61\n",
      "11:31:55 [DEBUG] train episode 68: reward = 43.00, steps = 43\n",
      "11:31:57 [DEBUG] train episode 69: reward = 51.00, steps = 51\n",
      "11:32:00 [DEBUG] train episode 70: reward = 58.00, steps = 58\n",
      "11:32:03 [DEBUG] train episode 71: reward = 64.00, steps = 64\n",
      "11:32:05 [DEBUG] train episode 72: reward = 38.00, steps = 38\n",
      "11:32:07 [DEBUG] train episode 73: reward = 34.00, steps = 34\n",
      "11:32:11 [DEBUG] train episode 74: reward = 88.00, steps = 88\n",
      "11:32:16 [DEBUG] train episode 75: reward = 125.00, steps = 125\n",
      "11:32:21 [DEBUG] train episode 76: reward = 92.00, steps = 92\n",
      "11:32:25 [DEBUG] train episode 77: reward = 103.00, steps = 103\n",
      "11:32:29 [DEBUG] train episode 78: reward = 82.00, steps = 82\n",
      "11:32:31 [DEBUG] train episode 79: reward = 37.00, steps = 37\n",
      "11:32:33 [DEBUG] train episode 80: reward = 38.00, steps = 38\n",
      "11:32:36 [DEBUG] train episode 81: reward = 61.00, steps = 61\n",
      "11:32:38 [DEBUG] train episode 82: reward = 60.00, steps = 60\n",
      "11:32:41 [DEBUG] train episode 83: reward = 63.00, steps = 63\n",
      "11:32:45 [DEBUG] train episode 84: reward = 81.00, steps = 81\n",
      "11:32:48 [DEBUG] train episode 85: reward = 54.00, steps = 54\n",
      "11:32:50 [DEBUG] train episode 86: reward = 50.00, steps = 50\n",
      "11:32:53 [DEBUG] train episode 87: reward = 72.00, steps = 72\n",
      "11:32:56 [DEBUG] train episode 88: reward = 59.00, steps = 59\n",
      "11:33:01 [DEBUG] train episode 89: reward = 96.00, steps = 96\n",
      "11:33:03 [DEBUG] train episode 90: reward = 48.00, steps = 48\n",
      "11:33:07 [DEBUG] train episode 91: reward = 83.00, steps = 83\n",
      "11:33:09 [DEBUG] train episode 92: reward = 50.00, steps = 50\n",
      "11:33:12 [DEBUG] train episode 93: reward = 54.00, steps = 54\n",
      "11:33:14 [DEBUG] train episode 94: reward = 47.00, steps = 47\n",
      "11:33:16 [DEBUG] train episode 95: reward = 45.00, steps = 45\n",
      "11:33:19 [DEBUG] train episode 96: reward = 68.00, steps = 68\n",
      "11:33:22 [DEBUG] train episode 97: reward = 66.00, steps = 66\n",
      "11:33:25 [DEBUG] train episode 98: reward = 53.00, steps = 53\n",
      "11:33:28 [DEBUG] train episode 99: reward = 58.00, steps = 58\n",
      "11:33:32 [DEBUG] train episode 100: reward = 90.00, steps = 90\n",
      "11:33:35 [DEBUG] train episode 101: reward = 74.00, steps = 74\n",
      "11:33:38 [DEBUG] train episode 102: reward = 61.00, steps = 61\n",
      "11:33:41 [DEBUG] train episode 103: reward = 50.00, steps = 50\n",
      "11:33:43 [DEBUG] train episode 104: reward = 51.00, steps = 51\n",
      "11:33:46 [DEBUG] train episode 105: reward = 58.00, steps = 58\n",
      "11:33:50 [DEBUG] train episode 106: reward = 100.00, steps = 100\n",
      "11:33:53 [DEBUG] train episode 107: reward = 49.00, steps = 49\n",
      "11:33:55 [DEBUG] train episode 108: reward = 50.00, steps = 50\n",
      "11:33:57 [DEBUG] train episode 109: reward = 32.00, steps = 32\n",
      "11:34:01 [DEBUG] train episode 110: reward = 81.00, steps = 81\n",
      "11:34:04 [DEBUG] train episode 111: reward = 75.00, steps = 75\n",
      "11:34:08 [DEBUG] train episode 112: reward = 76.00, steps = 76\n",
      "11:34:11 [DEBUG] train episode 113: reward = 71.00, steps = 71\n",
      "11:34:14 [DEBUG] train episode 114: reward = 72.00, steps = 72\n",
      "11:34:17 [DEBUG] train episode 115: reward = 63.00, steps = 63\n",
      "11:34:21 [DEBUG] train episode 116: reward = 87.00, steps = 87\n",
      "11:34:26 [DEBUG] train episode 117: reward = 104.00, steps = 104\n",
      "11:34:30 [DEBUG] train episode 118: reward = 91.00, steps = 91\n",
      "11:34:33 [DEBUG] train episode 119: reward = 53.00, steps = 53\n",
      "11:34:36 [DEBUG] train episode 120: reward = 61.00, steps = 61\n",
      "11:34:39 [DEBUG] train episode 121: reward = 61.00, steps = 61\n",
      "11:34:41 [DEBUG] train episode 122: reward = 57.00, steps = 57\n",
      "11:34:46 [DEBUG] train episode 123: reward = 97.00, steps = 97\n",
      "11:34:50 [DEBUG] train episode 124: reward = 70.00, steps = 70\n",
      "11:34:52 [DEBUG] train episode 125: reward = 41.00, steps = 41\n",
      "11:34:55 [DEBUG] train episode 126: reward = 66.00, steps = 66\n",
      "11:34:58 [DEBUG] train episode 127: reward = 74.00, steps = 74\n",
      "11:35:02 [DEBUG] train episode 128: reward = 84.00, steps = 84\n",
      "11:35:05 [DEBUG] train episode 129: reward = 59.00, steps = 59\n",
      "11:35:07 [DEBUG] train episode 130: reward = 53.00, steps = 53\n",
      "11:35:14 [DEBUG] train episode 131: reward = 145.00, steps = 145\n",
      "11:35:18 [DEBUG] train episode 132: reward = 93.00, steps = 93\n",
      "11:35:22 [DEBUG] train episode 133: reward = 85.00, steps = 85\n",
      "11:35:26 [DEBUG] train episode 134: reward = 81.00, steps = 81\n",
      "11:35:28 [DEBUG] train episode 135: reward = 40.00, steps = 40\n",
      "11:35:31 [DEBUG] train episode 136: reward = 56.00, steps = 56\n",
      "11:35:33 [DEBUG] train episode 137: reward = 59.00, steps = 59\n",
      "11:35:36 [DEBUG] train episode 138: reward = 52.00, steps = 52\n",
      "11:35:38 [DEBUG] train episode 139: reward = 39.00, steps = 39\n",
      "11:35:45 [DEBUG] train episode 140: reward = 143.00, steps = 143\n",
      "11:35:50 [DEBUG] train episode 141: reward = 104.00, steps = 104\n",
      "11:35:53 [DEBUG] train episode 142: reward = 65.00, steps = 65\n",
      "11:35:55 [DEBUG] train episode 143: reward = 50.00, steps = 50\n",
      "11:35:58 [DEBUG] train episode 144: reward = 72.00, steps = 72\n",
      "11:36:02 [DEBUG] train episode 145: reward = 67.00, steps = 67\n",
      "11:36:07 [DEBUG] train episode 146: reward = 127.00, steps = 127\n",
      "11:36:16 [DEBUG] train episode 147: reward = 188.00, steps = 188\n",
      "11:36:20 [DEBUG] train episode 148: reward = 90.00, steps = 90\n",
      "11:36:23 [DEBUG] train episode 149: reward = 66.00, steps = 66\n",
      "11:36:26 [DEBUG] train episode 150: reward = 71.00, steps = 71\n",
      "11:36:30 [DEBUG] train episode 151: reward = 71.00, steps = 71\n",
      "11:36:33 [DEBUG] train episode 152: reward = 81.00, steps = 81\n",
      "11:36:37 [DEBUG] train episode 153: reward = 72.00, steps = 72\n",
      "11:36:40 [DEBUG] train episode 154: reward = 68.00, steps = 68\n",
      "11:36:46 [DEBUG] train episode 155: reward = 126.00, steps = 126\n",
      "11:36:47 [DEBUG] train episode 156: reward = 38.00, steps = 38\n",
      "11:36:50 [DEBUG] train episode 157: reward = 60.00, steps = 60\n",
      "11:36:53 [DEBUG] train episode 158: reward = 68.00, steps = 68\n",
      "11:36:58 [DEBUG] train episode 159: reward = 89.00, steps = 89\n",
      "11:37:02 [DEBUG] train episode 160: reward = 107.00, steps = 107\n",
      "11:37:08 [DEBUG] train episode 161: reward = 122.00, steps = 122\n",
      "11:37:10 [DEBUG] train episode 162: reward = 49.00, steps = 49\n",
      "11:37:14 [DEBUG] train episode 163: reward = 71.00, steps = 71\n",
      "11:37:16 [DEBUG] train episode 164: reward = 48.00, steps = 48\n",
      "11:37:21 [DEBUG] train episode 165: reward = 125.00, steps = 125\n",
      "11:37:26 [DEBUG] train episode 166: reward = 110.00, steps = 110\n",
      "11:37:30 [DEBUG] train episode 167: reward = 85.00, steps = 85\n",
      "11:37:34 [DEBUG] train episode 168: reward = 86.00, steps = 86\n",
      "11:37:38 [DEBUG] train episode 169: reward = 75.00, steps = 75\n",
      "11:37:41 [DEBUG] train episode 170: reward = 63.00, steps = 63\n",
      "11:37:44 [DEBUG] train episode 171: reward = 74.00, steps = 74\n",
      "11:37:48 [DEBUG] train episode 172: reward = 78.00, steps = 78\n",
      "11:37:51 [DEBUG] train episode 173: reward = 64.00, steps = 64\n",
      "11:37:52 [DEBUG] train episode 174: reward = 39.00, steps = 39\n",
      "11:37:56 [DEBUG] train episode 175: reward = 69.00, steps = 69\n",
      "11:37:59 [DEBUG] train episode 176: reward = 73.00, steps = 73\n",
      "11:38:03 [DEBUG] train episode 177: reward = 80.00, steps = 80\n",
      "11:38:05 [DEBUG] train episode 178: reward = 56.00, steps = 56\n",
      "11:38:07 [DEBUG] train episode 179: reward = 43.00, steps = 43\n",
      "11:38:12 [DEBUG] train episode 180: reward = 100.00, steps = 100\n",
      "11:38:15 [DEBUG] train episode 181: reward = 67.00, steps = 67\n",
      "11:38:17 [DEBUG] train episode 182: reward = 46.00, steps = 46\n",
      "11:38:21 [DEBUG] train episode 183: reward = 77.00, steps = 77\n",
      "11:38:24 [DEBUG] train episode 184: reward = 70.00, steps = 70\n",
      "11:38:27 [DEBUG] train episode 185: reward = 58.00, steps = 58\n",
      "11:38:29 [DEBUG] train episode 186: reward = 55.00, steps = 55\n",
      "11:38:34 [DEBUG] train episode 187: reward = 96.00, steps = 96\n",
      "11:38:40 [DEBUG] train episode 188: reward = 138.00, steps = 138\n",
      "11:38:42 [DEBUG] train episode 189: reward = 52.00, steps = 52\n",
      "11:38:47 [DEBUG] train episode 190: reward = 91.00, steps = 91\n",
      "11:38:50 [DEBUG] train episode 191: reward = 70.00, steps = 70\n",
      "11:38:53 [DEBUG] train episode 192: reward = 73.00, steps = 73\n",
      "11:38:57 [DEBUG] train episode 193: reward = 81.00, steps = 81\n",
      "11:39:03 [DEBUG] train episode 194: reward = 139.00, steps = 139\n",
      "11:39:06 [DEBUG] train episode 195: reward = 63.00, steps = 63\n",
      "11:39:13 [DEBUG] train episode 196: reward = 132.00, steps = 132\n",
      "11:39:17 [DEBUG] train episode 197: reward = 82.00, steps = 82\n",
      "11:39:20 [DEBUG] train episode 198: reward = 81.00, steps = 81\n",
      "11:39:24 [DEBUG] train episode 199: reward = 78.00, steps = 78\n",
      "11:39:28 [DEBUG] train episode 200: reward = 93.00, steps = 93\n",
      "11:39:31 [DEBUG] train episode 201: reward = 70.00, steps = 70\n",
      "11:39:34 [DEBUG] train episode 202: reward = 63.00, steps = 63\n",
      "11:39:39 [DEBUG] train episode 203: reward = 95.00, steps = 95\n",
      "11:39:42 [DEBUG] train episode 204: reward = 70.00, steps = 70\n",
      "11:39:47 [DEBUG] train episode 205: reward = 104.00, steps = 104\n",
      "11:39:50 [DEBUG] train episode 206: reward = 66.00, steps = 66\n",
      "11:39:57 [DEBUG] train episode 207: reward = 160.00, steps = 160\n",
      "11:40:01 [DEBUG] train episode 208: reward = 95.00, steps = 95\n",
      "11:40:04 [DEBUG] train episode 209: reward = 54.00, steps = 54\n",
      "11:40:06 [DEBUG] train episode 210: reward = 53.00, steps = 53\n",
      "11:40:09 [DEBUG] train episode 211: reward = 58.00, steps = 58\n",
      "11:40:13 [DEBUG] train episode 212: reward = 95.00, steps = 95\n",
      "11:40:18 [DEBUG] train episode 213: reward = 89.00, steps = 89\n",
      "11:40:23 [DEBUG] train episode 214: reward = 115.00, steps = 115\n",
      "11:40:28 [DEBUG] train episode 215: reward = 112.00, steps = 112\n",
      "11:40:32 [DEBUG] train episode 216: reward = 92.00, steps = 92\n",
      "11:40:37 [DEBUG] train episode 217: reward = 104.00, steps = 104\n",
      "11:40:40 [DEBUG] train episode 218: reward = 71.00, steps = 71\n",
      "11:40:45 [DEBUG] train episode 219: reward = 104.00, steps = 104\n",
      "11:40:48 [DEBUG] train episode 220: reward = 67.00, steps = 67\n",
      "11:40:51 [DEBUG] train episode 221: reward = 68.00, steps = 68\n",
      "11:40:56 [DEBUG] train episode 222: reward = 100.00, steps = 100\n",
      "11:40:59 [DEBUG] train episode 223: reward = 63.00, steps = 63\n",
      "11:41:01 [DEBUG] train episode 224: reward = 44.00, steps = 44\n",
      "11:41:03 [DEBUG] train episode 225: reward = 51.00, steps = 51\n",
      "11:41:06 [DEBUG] train episode 226: reward = 63.00, steps = 63\n",
      "11:41:10 [DEBUG] train episode 227: reward = 83.00, steps = 83\n",
      "11:41:16 [DEBUG] train episode 228: reward = 122.00, steps = 122\n",
      "11:41:18 [DEBUG] train episode 229: reward = 60.00, steps = 60\n",
      "11:41:21 [DEBUG] train episode 230: reward = 66.00, steps = 66\n",
      "11:41:25 [DEBUG] train episode 231: reward = 82.00, steps = 82\n",
      "11:41:29 [DEBUG] train episode 232: reward = 75.00, steps = 75\n",
      "11:41:32 [DEBUG] train episode 233: reward = 67.00, steps = 67\n",
      "11:41:35 [DEBUG] train episode 234: reward = 77.00, steps = 77\n",
      "11:41:39 [DEBUG] train episode 235: reward = 74.00, steps = 74\n",
      "11:41:44 [DEBUG] train episode 236: reward = 113.00, steps = 113\n",
      "11:41:47 [DEBUG] train episode 237: reward = 66.00, steps = 66\n",
      "11:41:51 [DEBUG] train episode 238: reward = 83.00, steps = 83\n",
      "11:41:54 [DEBUG] train episode 239: reward = 66.00, steps = 66\n",
      "11:41:57 [DEBUG] train episode 240: reward = 69.00, steps = 69\n",
      "11:42:02 [DEBUG] train episode 241: reward = 101.00, steps = 101\n",
      "11:42:08 [DEBUG] train episode 242: reward = 133.00, steps = 133\n",
      "11:42:11 [DEBUG] train episode 243: reward = 72.00, steps = 72\n",
      "11:42:17 [DEBUG] train episode 244: reward = 120.00, steps = 120\n",
      "11:42:21 [DEBUG] train episode 245: reward = 104.00, steps = 104\n",
      "11:42:26 [DEBUG] train episode 246: reward = 97.00, steps = 97\n",
      "11:42:29 [DEBUG] train episode 247: reward = 73.00, steps = 73\n",
      "11:42:32 [DEBUG] train episode 248: reward = 55.00, steps = 55\n",
      "11:42:34 [DEBUG] train episode 249: reward = 62.00, steps = 62\n",
      "11:42:39 [DEBUG] train episode 250: reward = 88.00, steps = 88\n",
      "11:42:42 [DEBUG] train episode 251: reward = 73.00, steps = 73\n",
      "11:42:44 [DEBUG] train episode 252: reward = 50.00, steps = 50\n",
      "11:42:48 [DEBUG] train episode 253: reward = 79.00, steps = 79\n",
      "11:42:52 [DEBUG] train episode 254: reward = 88.00, steps = 88\n",
      "11:42:57 [DEBUG] train episode 255: reward = 100.00, steps = 100\n",
      "11:43:04 [DEBUG] train episode 256: reward = 174.00, steps = 174\n",
      "11:43:10 [DEBUG] train episode 257: reward = 121.00, steps = 121\n",
      "11:43:14 [DEBUG] train episode 258: reward = 75.00, steps = 75\n",
      "11:43:16 [DEBUG] train episode 259: reward = 42.00, steps = 42\n",
      "11:43:17 [DEBUG] train episode 260: reward = 41.00, steps = 41\n",
      "11:43:23 [DEBUG] train episode 261: reward = 123.00, steps = 123\n",
      "11:43:26 [DEBUG] train episode 262: reward = 58.00, steps = 58\n",
      "11:43:28 [DEBUG] train episode 263: reward = 58.00, steps = 58\n",
      "11:43:33 [DEBUG] train episode 264: reward = 105.00, steps = 105\n",
      "11:43:36 [DEBUG] train episode 265: reward = 61.00, steps = 61\n",
      "11:43:39 [DEBUG] train episode 266: reward = 63.00, steps = 63\n",
      "11:43:42 [DEBUG] train episode 267: reward = 74.00, steps = 74\n",
      "11:43:46 [DEBUG] train episode 268: reward = 71.00, steps = 71\n",
      "11:43:48 [DEBUG] train episode 269: reward = 56.00, steps = 56\n",
      "11:43:53 [DEBUG] train episode 270: reward = 113.00, steps = 113\n",
      "11:43:56 [DEBUG] train episode 271: reward = 60.00, steps = 60\n",
      "11:44:00 [DEBUG] train episode 272: reward = 77.00, steps = 77\n",
      "11:44:03 [DEBUG] train episode 273: reward = 67.00, steps = 67\n",
      "11:44:07 [DEBUG] train episode 274: reward = 78.00, steps = 78\n",
      "11:44:10 [DEBUG] train episode 275: reward = 68.00, steps = 68\n",
      "11:44:14 [DEBUG] train episode 276: reward = 91.00, steps = 91\n",
      "11:44:18 [DEBUG] train episode 277: reward = 84.00, steps = 84\n",
      "11:44:23 [DEBUG] train episode 278: reward = 112.00, steps = 112\n",
      "11:44:26 [DEBUG] train episode 279: reward = 74.00, steps = 74\n",
      "11:44:31 [DEBUG] train episode 280: reward = 94.00, steps = 94\n",
      "11:44:34 [DEBUG] train episode 281: reward = 75.00, steps = 75\n",
      "11:44:37 [DEBUG] train episode 282: reward = 75.00, steps = 75\n",
      "11:44:40 [DEBUG] train episode 283: reward = 56.00, steps = 56\n",
      "11:44:44 [DEBUG] train episode 284: reward = 81.00, steps = 81\n",
      "11:44:48 [DEBUG] train episode 285: reward = 79.00, steps = 79\n",
      "11:44:51 [DEBUG] train episode 286: reward = 64.00, steps = 64\n",
      "11:44:53 [DEBUG] train episode 287: reward = 46.00, steps = 46\n",
      "11:44:56 [DEBUG] train episode 288: reward = 61.00, steps = 61\n",
      "11:44:58 [DEBUG] train episode 289: reward = 52.00, steps = 52\n",
      "11:45:02 [DEBUG] train episode 290: reward = 79.00, steps = 79\n",
      "11:45:06 [DEBUG] train episode 291: reward = 97.00, steps = 97\n",
      "11:45:09 [DEBUG] train episode 292: reward = 71.00, steps = 71\n",
      "11:45:13 [DEBUG] train episode 293: reward = 81.00, steps = 81\n",
      "11:45:16 [DEBUG] train episode 294: reward = 63.00, steps = 63\n",
      "11:45:20 [DEBUG] train episode 295: reward = 80.00, steps = 80\n",
      "11:45:24 [DEBUG] train episode 296: reward = 100.00, steps = 100\n",
      "11:45:28 [DEBUG] train episode 297: reward = 80.00, steps = 80\n",
      "11:45:35 [DEBUG] train episode 298: reward = 150.00, steps = 150\n",
      "11:45:42 [DEBUG] train episode 299: reward = 163.00, steps = 163\n",
      "11:45:44 [DEBUG] train episode 300: reward = 47.00, steps = 47\n",
      "11:45:48 [DEBUG] train episode 301: reward = 86.00, steps = 86\n",
      "11:45:53 [DEBUG] train episode 302: reward = 106.00, steps = 106\n",
      "11:45:56 [DEBUG] train episode 303: reward = 68.00, steps = 68\n",
      "11:46:00 [DEBUG] train episode 304: reward = 82.00, steps = 82\n",
      "11:46:03 [DEBUG] train episode 305: reward = 66.00, steps = 66\n",
      "11:46:05 [DEBUG] train episode 306: reward = 46.00, steps = 46\n",
      "11:46:08 [DEBUG] train episode 307: reward = 69.00, steps = 69\n",
      "11:46:11 [DEBUG] train episode 308: reward = 58.00, steps = 58\n",
      "11:46:14 [DEBUG] train episode 309: reward = 60.00, steps = 60\n",
      "11:46:18 [DEBUG] train episode 310: reward = 87.00, steps = 87\n",
      "11:46:22 [DEBUG] train episode 311: reward = 88.00, steps = 88\n",
      "11:46:27 [DEBUG] train episode 312: reward = 117.00, steps = 117\n",
      "11:46:30 [DEBUG] train episode 313: reward = 52.00, steps = 52\n",
      "11:46:33 [DEBUG] train episode 314: reward = 82.00, steps = 82\n",
      "11:46:38 [DEBUG] train episode 315: reward = 95.00, steps = 95\n",
      "11:46:42 [DEBUG] train episode 316: reward = 85.00, steps = 85\n",
      "11:46:46 [DEBUG] train episode 317: reward = 86.00, steps = 86\n",
      "11:46:52 [DEBUG] train episode 318: reward = 145.00, steps = 145\n",
      "11:46:55 [DEBUG] train episode 319: reward = 62.00, steps = 62\n",
      "11:46:59 [DEBUG] train episode 320: reward = 88.00, steps = 88\n",
      "11:47:02 [DEBUG] train episode 321: reward = 53.00, steps = 53\n",
      "11:47:05 [DEBUG] train episode 322: reward = 64.00, steps = 64\n",
      "11:47:10 [DEBUG] train episode 323: reward = 112.00, steps = 112\n",
      "11:47:13 [DEBUG] train episode 324: reward = 74.00, steps = 74\n",
      "11:47:16 [DEBUG] train episode 325: reward = 58.00, steps = 58\n",
      "11:47:19 [DEBUG] train episode 326: reward = 65.00, steps = 65\n",
      "11:47:22 [DEBUG] train episode 327: reward = 71.00, steps = 71\n",
      "11:47:26 [DEBUG] train episode 328: reward = 87.00, steps = 87\n",
      "11:47:29 [DEBUG] train episode 329: reward = 61.00, steps = 61\n",
      "11:47:33 [DEBUG] train episode 330: reward = 86.00, steps = 86\n",
      "11:47:36 [DEBUG] train episode 331: reward = 73.00, steps = 73\n",
      "11:47:40 [DEBUG] train episode 332: reward = 86.00, steps = 86\n",
      "11:47:43 [DEBUG] train episode 333: reward = 54.00, steps = 54\n",
      "11:47:46 [DEBUG] train episode 334: reward = 62.00, steps = 62\n",
      "11:47:49 [DEBUG] train episode 335: reward = 70.00, steps = 70\n",
      "11:47:52 [DEBUG] train episode 336: reward = 60.00, steps = 60\n",
      "11:47:56 [DEBUG] train episode 337: reward = 82.00, steps = 82\n",
      "11:48:00 [DEBUG] train episode 338: reward = 97.00, steps = 97\n",
      "11:48:03 [DEBUG] train episode 339: reward = 70.00, steps = 70\n",
      "11:48:07 [DEBUG] train episode 340: reward = 83.00, steps = 83\n",
      "11:48:11 [DEBUG] train episode 341: reward = 91.00, steps = 91\n",
      "11:48:15 [DEBUG] train episode 342: reward = 84.00, steps = 84\n",
      "11:48:18 [DEBUG] train episode 343: reward = 72.00, steps = 72\n",
      "11:48:24 [DEBUG] train episode 344: reward = 119.00, steps = 119\n",
      "11:48:27 [DEBUG] train episode 345: reward = 59.00, steps = 59\n",
      "11:48:30 [DEBUG] train episode 346: reward = 82.00, steps = 82\n",
      "11:48:35 [DEBUG] train episode 347: reward = 94.00, steps = 94\n",
      "11:48:39 [DEBUG] train episode 348: reward = 103.00, steps = 103\n",
      "11:48:46 [DEBUG] train episode 349: reward = 149.00, steps = 149\n",
      "11:48:50 [DEBUG] train episode 350: reward = 82.00, steps = 82\n",
      "11:48:54 [DEBUG] train episode 351: reward = 77.00, steps = 77\n",
      "11:48:57 [DEBUG] train episode 352: reward = 78.00, steps = 78\n",
      "11:49:01 [DEBUG] train episode 353: reward = 75.00, steps = 75\n",
      "11:49:05 [DEBUG] train episode 354: reward = 89.00, steps = 89\n",
      "11:49:09 [DEBUG] train episode 355: reward = 81.00, steps = 81\n",
      "11:49:15 [DEBUG] train episode 356: reward = 137.00, steps = 137\n",
      "11:49:19 [DEBUG] train episode 357: reward = 105.00, steps = 105\n",
      "11:49:23 [DEBUG] train episode 358: reward = 88.00, steps = 88\n",
      "11:49:28 [DEBUG] train episode 359: reward = 102.00, steps = 102\n",
      "11:49:32 [DEBUG] train episode 360: reward = 85.00, steps = 85\n",
      "11:49:37 [DEBUG] train episode 361: reward = 106.00, steps = 106\n",
      "11:49:40 [DEBUG] train episode 362: reward = 74.00, steps = 74\n",
      "11:49:44 [DEBUG] train episode 363: reward = 79.00, steps = 79\n",
      "11:49:48 [DEBUG] train episode 364: reward = 78.00, steps = 78\n",
      "11:49:52 [DEBUG] train episode 365: reward = 93.00, steps = 93\n",
      "11:49:59 [DEBUG] train episode 366: reward = 147.00, steps = 147\n",
      "11:50:05 [DEBUG] train episode 367: reward = 149.00, steps = 149\n",
      "11:50:12 [DEBUG] train episode 368: reward = 140.00, steps = 140\n",
      "11:50:15 [DEBUG] train episode 369: reward = 71.00, steps = 71\n",
      "11:50:21 [DEBUG] train episode 370: reward = 123.00, steps = 123\n",
      "11:50:25 [DEBUG] train episode 371: reward = 99.00, steps = 99\n",
      "11:50:28 [DEBUG] train episode 372: reward = 71.00, steps = 71\n",
      "11:50:33 [DEBUG] train episode 373: reward = 110.00, steps = 110\n",
      "11:50:36 [DEBUG] train episode 374: reward = 65.00, steps = 65\n",
      "11:50:39 [DEBUG] train episode 375: reward = 61.00, steps = 61\n",
      "11:50:44 [DEBUG] train episode 376: reward = 96.00, steps = 96\n",
      "11:50:48 [DEBUG] train episode 377: reward = 100.00, steps = 100\n",
      "11:50:55 [DEBUG] train episode 378: reward = 148.00, steps = 148\n",
      "11:51:03 [DEBUG] train episode 379: reward = 166.00, steps = 166\n",
      "11:51:05 [DEBUG] train episode 380: reward = 59.00, steps = 59\n",
      "11:51:09 [DEBUG] train episode 381: reward = 84.00, steps = 84\n",
      "11:51:16 [DEBUG] train episode 382: reward = 150.00, steps = 150\n",
      "11:51:21 [DEBUG] train episode 383: reward = 100.00, steps = 100\n",
      "11:51:23 [DEBUG] train episode 384: reward = 65.00, steps = 65\n",
      "11:51:28 [DEBUG] train episode 385: reward = 92.00, steps = 92\n",
      "11:51:31 [DEBUG] train episode 386: reward = 66.00, steps = 66\n",
      "11:51:35 [DEBUG] train episode 387: reward = 101.00, steps = 101\n",
      "11:51:39 [DEBUG] train episode 388: reward = 86.00, steps = 86\n",
      "11:51:44 [DEBUG] train episode 389: reward = 92.00, steps = 92\n",
      "11:51:47 [DEBUG] train episode 390: reward = 73.00, steps = 73\n",
      "11:51:50 [DEBUG] train episode 391: reward = 60.00, steps = 60\n",
      "11:51:53 [DEBUG] train episode 392: reward = 71.00, steps = 71\n",
      "11:51:58 [DEBUG] train episode 393: reward = 109.00, steps = 109\n",
      "11:52:02 [DEBUG] train episode 394: reward = 95.00, steps = 95\n",
      "11:52:06 [DEBUG] train episode 395: reward = 74.00, steps = 74\n",
      "11:52:11 [DEBUG] train episode 396: reward = 117.00, steps = 117\n",
      "11:52:16 [DEBUG] train episode 397: reward = 110.00, steps = 110\n",
      "11:52:20 [DEBUG] train episode 398: reward = 89.00, steps = 89\n",
      "11:52:25 [DEBUG] train episode 399: reward = 103.00, steps = 103\n",
      "11:52:30 [DEBUG] train episode 400: reward = 107.00, steps = 107\n",
      "11:52:35 [DEBUG] train episode 401: reward = 111.00, steps = 111\n",
      "11:52:38 [DEBUG] train episode 402: reward = 79.00, steps = 79\n",
      "11:52:42 [DEBUG] train episode 403: reward = 75.00, steps = 75\n",
      "11:52:46 [DEBUG] train episode 404: reward = 95.00, steps = 95\n",
      "11:52:52 [DEBUG] train episode 405: reward = 133.00, steps = 133\n",
      "11:52:57 [DEBUG] train episode 406: reward = 100.00, steps = 100\n",
      "11:53:02 [DEBUG] train episode 407: reward = 101.00, steps = 101\n",
      "11:53:06 [DEBUG] train episode 408: reward = 105.00, steps = 105\n",
      "11:53:10 [DEBUG] train episode 409: reward = 88.00, steps = 88\n",
      "11:53:15 [DEBUG] train episode 410: reward = 110.00, steps = 110\n",
      "11:53:19 [DEBUG] train episode 411: reward = 81.00, steps = 81\n",
      "11:53:25 [DEBUG] train episode 412: reward = 121.00, steps = 121\n",
      "11:53:28 [DEBUG] train episode 413: reward = 76.00, steps = 76\n",
      "11:53:33 [DEBUG] train episode 414: reward = 103.00, steps = 103\n",
      "11:53:36 [DEBUG] train episode 415: reward = 66.00, steps = 66\n",
      "11:53:40 [DEBUG] train episode 416: reward = 91.00, steps = 91\n",
      "11:53:44 [DEBUG] train episode 417: reward = 83.00, steps = 83\n",
      "11:53:49 [DEBUG] train episode 418: reward = 110.00, steps = 110\n",
      "11:53:58 [DEBUG] train episode 419: reward = 200.00, steps = 200\n",
      "11:54:02 [DEBUG] train episode 420: reward = 94.00, steps = 94\n",
      "11:54:06 [DEBUG] train episode 421: reward = 76.00, steps = 76\n",
      "11:54:10 [DEBUG] train episode 422: reward = 92.00, steps = 92\n",
      "11:54:14 [DEBUG] train episode 423: reward = 82.00, steps = 82\n",
      "11:54:16 [DEBUG] train episode 424: reward = 58.00, steps = 58\n",
      "11:54:21 [DEBUG] train episode 425: reward = 99.00, steps = 99\n",
      "11:54:26 [DEBUG] train episode 426: reward = 104.00, steps = 104\n",
      "11:54:30 [DEBUG] train episode 427: reward = 74.00, steps = 74\n",
      "11:54:32 [DEBUG] train episode 428: reward = 59.00, steps = 59\n",
      "11:54:35 [DEBUG] train episode 429: reward = 58.00, steps = 58\n",
      "11:54:40 [DEBUG] train episode 430: reward = 97.00, steps = 97\n",
      "11:54:45 [DEBUG] train episode 431: reward = 109.00, steps = 109\n",
      "11:54:51 [DEBUG] train episode 432: reward = 126.00, steps = 126\n",
      "11:54:55 [DEBUG] train episode 433: reward = 88.00, steps = 88\n",
      "11:54:59 [DEBUG] train episode 434: reward = 95.00, steps = 95\n",
      "11:55:04 [DEBUG] train episode 435: reward = 116.00, steps = 116\n",
      "11:55:08 [DEBUG] train episode 436: reward = 77.00, steps = 77\n",
      "11:55:16 [DEBUG] train episode 437: reward = 172.00, steps = 172\n",
      "11:55:19 [DEBUG] train episode 438: reward = 82.00, steps = 82\n",
      "11:55:23 [DEBUG] train episode 439: reward = 86.00, steps = 86\n",
      "11:55:29 [DEBUG] train episode 440: reward = 128.00, steps = 128\n",
      "11:55:36 [DEBUG] train episode 441: reward = 142.00, steps = 142\n",
      "11:55:40 [DEBUG] train episode 442: reward = 99.00, steps = 99\n",
      "11:55:44 [DEBUG] train episode 443: reward = 84.00, steps = 84\n",
      "11:55:50 [DEBUG] train episode 444: reward = 121.00, steps = 121\n",
      "11:55:54 [DEBUG] train episode 445: reward = 95.00, steps = 95\n",
      "11:55:57 [DEBUG] train episode 446: reward = 65.00, steps = 65\n",
      "11:56:01 [DEBUG] train episode 447: reward = 75.00, steps = 75\n",
      "11:56:05 [DEBUG] train episode 448: reward = 104.00, steps = 104\n",
      "11:56:10 [DEBUG] train episode 449: reward = 100.00, steps = 100\n",
      "11:56:15 [DEBUG] train episode 450: reward = 106.00, steps = 106\n",
      "11:56:18 [DEBUG] train episode 451: reward = 67.00, steps = 67\n",
      "11:56:23 [DEBUG] train episode 452: reward = 105.00, steps = 105\n",
      "11:56:26 [DEBUG] train episode 453: reward = 63.00, steps = 63\n",
      "11:56:30 [DEBUG] train episode 454: reward = 93.00, steps = 93\n",
      "11:56:34 [DEBUG] train episode 455: reward = 91.00, steps = 91\n",
      "11:56:39 [DEBUG] train episode 456: reward = 106.00, steps = 106\n",
      "11:56:44 [DEBUG] train episode 457: reward = 114.00, steps = 114\n",
      "11:56:48 [DEBUG] train episode 458: reward = 80.00, steps = 80\n",
      "11:56:53 [DEBUG] train episode 459: reward = 116.00, steps = 116\n",
      "11:56:56 [DEBUG] train episode 460: reward = 76.00, steps = 76\n",
      "11:57:01 [DEBUG] train episode 461: reward = 97.00, steps = 97\n",
      "11:57:05 [DEBUG] train episode 462: reward = 87.00, steps = 87\n",
      "11:57:12 [DEBUG] train episode 463: reward = 144.00, steps = 144\n",
      "11:57:15 [DEBUG] train episode 464: reward = 73.00, steps = 73\n",
      "11:57:20 [DEBUG] train episode 465: reward = 118.00, steps = 118\n",
      "11:57:24 [DEBUG] train episode 466: reward = 82.00, steps = 82\n",
      "11:57:28 [DEBUG] train episode 467: reward = 88.00, steps = 88\n",
      "11:57:34 [DEBUG] train episode 468: reward = 119.00, steps = 119\n",
      "11:57:39 [DEBUG] train episode 469: reward = 116.00, steps = 116\n",
      "11:57:44 [DEBUG] train episode 470: reward = 109.00, steps = 109\n",
      "11:57:48 [DEBUG] train episode 471: reward = 84.00, steps = 84\n",
      "11:57:57 [DEBUG] train episode 472: reward = 200.00, steps = 200\n",
      "11:58:02 [DEBUG] train episode 473: reward = 107.00, steps = 107\n",
      "11:58:08 [DEBUG] train episode 474: reward = 130.00, steps = 130\n",
      "11:58:11 [DEBUG] train episode 475: reward = 57.00, steps = 57\n",
      "11:58:13 [DEBUG] train episode 476: reward = 56.00, steps = 56\n",
      "11:58:18 [DEBUG] train episode 477: reward = 100.00, steps = 100\n",
      "11:58:23 [DEBUG] train episode 478: reward = 110.00, steps = 110\n",
      "11:58:27 [DEBUG] train episode 479: reward = 92.00, steps = 92\n",
      "11:58:31 [DEBUG] train episode 480: reward = 86.00, steps = 86\n",
      "11:58:36 [DEBUG] train episode 481: reward = 121.00, steps = 121\n",
      "11:58:41 [DEBUG] train episode 482: reward = 97.00, steps = 97\n",
      "11:58:45 [DEBUG] train episode 483: reward = 86.00, steps = 86\n",
      "11:58:51 [DEBUG] train episode 484: reward = 131.00, steps = 131\n",
      "11:58:54 [DEBUG] train episode 485: reward = 65.00, steps = 65\n",
      "11:58:58 [DEBUG] train episode 486: reward = 95.00, steps = 95\n",
      "11:59:03 [DEBUG] train episode 487: reward = 96.00, steps = 96\n",
      "11:59:06 [DEBUG] train episode 488: reward = 73.00, steps = 73\n",
      "11:59:09 [DEBUG] train episode 489: reward = 66.00, steps = 66\n",
      "11:59:16 [DEBUG] train episode 490: reward = 137.00, steps = 137\n",
      "11:59:20 [DEBUG] train episode 491: reward = 86.00, steps = 86\n",
      "11:59:23 [DEBUG] train episode 492: reward = 69.00, steps = 69\n",
      "11:59:28 [DEBUG] train episode 493: reward = 119.00, steps = 119\n",
      "11:59:33 [DEBUG] train episode 494: reward = 112.00, steps = 112\n",
      "11:59:37 [DEBUG] train episode 495: reward = 89.00, steps = 89\n",
      "11:59:42 [DEBUG] train episode 496: reward = 92.00, steps = 92\n",
      "11:59:45 [DEBUG] train episode 497: reward = 80.00, steps = 80\n",
      "11:59:49 [DEBUG] train episode 498: reward = 80.00, steps = 80\n",
      "11:59:52 [DEBUG] train episode 499: reward = 74.00, steps = 74\n",
      "11:59:56 [DEBUG] train episode 500: reward = 72.00, steps = 72\n",
      "12:00:00 [DEBUG] train episode 501: reward = 99.00, steps = 99\n",
      "12:00:04 [DEBUG] train episode 502: reward = 83.00, steps = 83\n",
      "12:00:11 [DEBUG] train episode 503: reward = 155.00, steps = 155\n",
      "12:00:16 [DEBUG] train episode 504: reward = 100.00, steps = 100\n",
      "12:00:21 [DEBUG] train episode 505: reward = 107.00, steps = 107\n",
      "12:00:26 [DEBUG] train episode 506: reward = 99.00, steps = 99\n",
      "12:00:29 [DEBUG] train episode 507: reward = 66.00, steps = 66\n",
      "12:00:32 [DEBUG] train episode 508: reward = 81.00, steps = 81\n",
      "12:00:39 [DEBUG] train episode 509: reward = 131.00, steps = 131\n",
      "12:00:42 [DEBUG] train episode 510: reward = 74.00, steps = 74\n",
      "12:00:47 [DEBUG] train episode 511: reward = 111.00, steps = 111\n",
      "12:00:52 [DEBUG] train episode 512: reward = 90.00, steps = 90\n",
      "12:00:56 [DEBUG] train episode 513: reward = 88.00, steps = 88\n",
      "12:01:00 [DEBUG] train episode 514: reward = 92.00, steps = 92\n",
      "12:01:04 [DEBUG] train episode 515: reward = 89.00, steps = 89\n",
      "12:01:07 [DEBUG] train episode 516: reward = 59.00, steps = 59\n",
      "12:01:11 [DEBUG] train episode 517: reward = 82.00, steps = 82\n",
      "12:01:15 [DEBUG] train episode 518: reward = 77.00, steps = 77\n",
      "12:01:19 [DEBUG] train episode 519: reward = 90.00, steps = 90\n",
      "12:01:24 [DEBUG] train episode 520: reward = 116.00, steps = 116\n",
      "12:01:27 [DEBUG] train episode 521: reward = 69.00, steps = 69\n",
      "12:01:33 [DEBUG] train episode 522: reward = 124.00, steps = 124\n",
      "12:01:41 [DEBUG] train episode 523: reward = 169.00, steps = 169\n",
      "12:01:46 [DEBUG] train episode 524: reward = 104.00, steps = 104\n",
      "12:01:49 [DEBUG] train episode 525: reward = 76.00, steps = 76\n",
      "12:01:53 [DEBUG] train episode 526: reward = 75.00, steps = 75\n",
      "12:01:58 [DEBUG] train episode 527: reward = 119.00, steps = 119\n",
      "12:02:03 [DEBUG] train episode 528: reward = 96.00, steps = 96\n",
      "12:02:09 [DEBUG] train episode 529: reward = 135.00, steps = 135\n",
      "12:02:13 [DEBUG] train episode 530: reward = 86.00, steps = 86\n",
      "12:02:19 [DEBUG] train episode 531: reward = 149.00, steps = 149\n",
      "12:02:25 [DEBUG] train episode 532: reward = 131.00, steps = 131\n",
      "12:02:29 [DEBUG] train episode 533: reward = 76.00, steps = 76\n",
      "12:02:38 [DEBUG] train episode 534: reward = 200.00, steps = 200\n",
      "12:02:42 [DEBUG] train episode 535: reward = 89.00, steps = 89\n",
      "12:02:47 [DEBUG] train episode 536: reward = 102.00, steps = 102\n",
      "12:02:51 [DEBUG] train episode 537: reward = 101.00, steps = 101\n",
      "12:02:55 [DEBUG] train episode 538: reward = 79.00, steps = 79\n",
      "12:02:59 [DEBUG] train episode 539: reward = 91.00, steps = 91\n",
      "12:03:03 [DEBUG] train episode 540: reward = 79.00, steps = 79\n",
      "12:03:09 [DEBUG] train episode 541: reward = 129.00, steps = 129\n",
      "12:03:13 [DEBUG] train episode 542: reward = 101.00, steps = 101\n",
      "12:03:19 [DEBUG] train episode 543: reward = 136.00, steps = 136\n",
      "12:03:24 [DEBUG] train episode 544: reward = 97.00, steps = 97\n",
      "12:03:30 [DEBUG] train episode 545: reward = 132.00, steps = 132\n",
      "12:03:35 [DEBUG] train episode 546: reward = 118.00, steps = 118\n",
      "12:03:41 [DEBUG] train episode 547: reward = 136.00, steps = 136\n",
      "12:03:47 [DEBUG] train episode 548: reward = 121.00, steps = 121\n",
      "12:03:50 [DEBUG] train episode 549: reward = 75.00, steps = 75\n",
      "12:03:56 [DEBUG] train episode 550: reward = 130.00, steps = 130\n",
      "12:04:00 [DEBUG] train episode 551: reward = 84.00, steps = 84\n",
      "12:04:05 [DEBUG] train episode 552: reward = 105.00, steps = 105\n",
      "12:04:11 [DEBUG] train episode 553: reward = 147.00, steps = 147\n",
      "12:04:18 [DEBUG] train episode 554: reward = 141.00, steps = 141\n",
      "12:04:22 [DEBUG] train episode 555: reward = 81.00, steps = 81\n",
      "12:04:26 [DEBUG] train episode 556: reward = 95.00, steps = 95\n",
      "12:04:31 [DEBUG] train episode 557: reward = 106.00, steps = 106\n",
      "12:04:38 [DEBUG] train episode 558: reward = 150.00, steps = 150\n",
      "12:04:41 [DEBUG] train episode 559: reward = 81.00, steps = 81\n",
      "12:04:46 [DEBUG] train episode 560: reward = 100.00, steps = 100\n",
      "12:04:52 [DEBUG] train episode 561: reward = 138.00, steps = 138\n",
      "12:04:59 [DEBUG] train episode 562: reward = 136.00, steps = 136\n",
      "12:05:03 [DEBUG] train episode 563: reward = 96.00, steps = 96\n",
      "12:05:06 [DEBUG] train episode 564: reward = 73.00, steps = 73\n",
      "12:05:15 [DEBUG] train episode 565: reward = 200.00, steps = 200\n",
      "12:05:19 [DEBUG] train episode 566: reward = 78.00, steps = 78\n",
      "12:05:27 [DEBUG] train episode 567: reward = 183.00, steps = 183\n",
      "12:05:34 [DEBUG] train episode 568: reward = 158.00, steps = 158\n",
      "12:05:38 [DEBUG] train episode 569: reward = 82.00, steps = 82\n",
      "12:05:45 [DEBUG] train episode 570: reward = 136.00, steps = 136\n",
      "12:05:48 [DEBUG] train episode 571: reward = 69.00, steps = 69\n",
      "12:05:53 [DEBUG] train episode 572: reward = 101.00, steps = 101\n",
      "12:05:59 [DEBUG] train episode 573: reward = 128.00, steps = 128\n",
      "12:06:04 [DEBUG] train episode 574: reward = 114.00, steps = 114\n",
      "12:06:08 [DEBUG] train episode 575: reward = 84.00, steps = 84\n",
      "12:06:10 [DEBUG] train episode 576: reward = 60.00, steps = 60\n",
      "12:06:15 [DEBUG] train episode 577: reward = 99.00, steps = 99\n",
      "12:06:20 [DEBUG] train episode 578: reward = 117.00, steps = 117\n",
      "12:06:25 [DEBUG] train episode 579: reward = 95.00, steps = 95\n",
      "12:06:31 [DEBUG] train episode 580: reward = 143.00, steps = 143\n",
      "12:06:35 [DEBUG] train episode 581: reward = 82.00, steps = 82\n",
      "12:06:42 [DEBUG] train episode 582: reward = 163.00, steps = 163\n",
      "12:06:50 [DEBUG] train episode 583: reward = 176.00, steps = 176\n",
      "12:06:55 [DEBUG] train episode 584: reward = 98.00, steps = 98\n",
      "12:06:58 [DEBUG] train episode 585: reward = 79.00, steps = 79\n",
      "12:07:06 [DEBUG] train episode 586: reward = 166.00, steps = 166\n",
      "12:07:12 [DEBUG] train episode 587: reward = 136.00, steps = 136\n",
      "12:07:16 [DEBUG] train episode 588: reward = 83.00, steps = 83\n",
      "12:07:25 [DEBUG] train episode 589: reward = 200.00, steps = 200\n",
      "12:07:30 [DEBUG] train episode 590: reward = 101.00, steps = 101\n",
      "12:07:36 [DEBUG] train episode 591: reward = 129.00, steps = 129\n",
      "12:07:42 [DEBUG] train episode 592: reward = 134.00, steps = 134\n",
      "12:07:47 [DEBUG] train episode 593: reward = 124.00, steps = 124\n",
      "12:07:53 [DEBUG] train episode 594: reward = 122.00, steps = 122\n",
      "12:07:58 [DEBUG] train episode 595: reward = 106.00, steps = 106\n",
      "12:08:02 [DEBUG] train episode 596: reward = 103.00, steps = 103\n",
      "12:08:07 [DEBUG] train episode 597: reward = 100.00, steps = 100\n",
      "12:08:12 [DEBUG] train episode 598: reward = 104.00, steps = 104\n",
      "12:08:16 [DEBUG] train episode 599: reward = 89.00, steps = 89\n",
      "12:08:21 [DEBUG] train episode 600: reward = 105.00, steps = 105\n",
      "12:08:25 [DEBUG] train episode 601: reward = 96.00, steps = 96\n",
      "12:08:31 [DEBUG] train episode 602: reward = 123.00, steps = 123\n",
      "12:08:35 [DEBUG] train episode 603: reward = 96.00, steps = 96\n",
      "12:08:39 [DEBUG] train episode 604: reward = 96.00, steps = 96\n",
      "12:08:46 [DEBUG] train episode 605: reward = 142.00, steps = 142\n",
      "12:08:52 [DEBUG] train episode 606: reward = 138.00, steps = 138\n",
      "12:08:57 [DEBUG] train episode 607: reward = 101.00, steps = 101\n",
      "12:09:05 [DEBUG] train episode 608: reward = 184.00, steps = 184\n",
      "12:09:09 [DEBUG] train episode 609: reward = 76.00, steps = 76\n",
      "12:09:13 [DEBUG] train episode 610: reward = 93.00, steps = 93\n",
      "12:09:19 [DEBUG] train episode 611: reward = 130.00, steps = 130\n",
      "12:09:25 [DEBUG] train episode 612: reward = 129.00, steps = 129\n",
      "12:09:31 [DEBUG] train episode 613: reward = 133.00, steps = 133\n",
      "12:09:35 [DEBUG] train episode 614: reward = 98.00, steps = 98\n",
      "12:09:39 [DEBUG] train episode 615: reward = 86.00, steps = 86\n",
      "12:09:46 [DEBUG] train episode 616: reward = 146.00, steps = 146\n",
      "12:09:50 [DEBUG] train episode 617: reward = 71.00, steps = 71\n",
      "12:09:54 [DEBUG] train episode 618: reward = 82.00, steps = 82\n",
      "12:10:00 [DEBUG] train episode 619: reward = 142.00, steps = 142\n",
      "12:10:04 [DEBUG] train episode 620: reward = 87.00, steps = 87\n",
      "12:10:12 [DEBUG] train episode 621: reward = 165.00, steps = 165\n",
      "12:10:15 [DEBUG] train episode 622: reward = 86.00, steps = 86\n",
      "12:10:19 [DEBUG] train episode 623: reward = 84.00, steps = 84\n",
      "12:10:25 [DEBUG] train episode 624: reward = 117.00, steps = 117\n",
      "12:10:29 [DEBUG] train episode 625: reward = 88.00, steps = 88\n",
      "12:10:32 [DEBUG] train episode 626: reward = 79.00, steps = 79\n",
      "12:10:38 [DEBUG] train episode 627: reward = 119.00, steps = 119\n",
      "12:10:42 [DEBUG] train episode 628: reward = 88.00, steps = 88\n",
      "12:10:48 [DEBUG] train episode 629: reward = 125.00, steps = 125\n",
      "12:10:51 [DEBUG] train episode 630: reward = 70.00, steps = 70\n",
      "12:10:56 [DEBUG] train episode 631: reward = 113.00, steps = 113\n",
      "12:11:05 [DEBUG] train episode 632: reward = 188.00, steps = 188\n",
      "12:11:10 [DEBUG] train episode 633: reward = 117.00, steps = 117\n",
      "12:11:14 [DEBUG] train episode 634: reward = 94.00, steps = 94\n",
      "12:11:21 [DEBUG] train episode 635: reward = 148.00, steps = 148\n",
      "12:11:26 [DEBUG] train episode 636: reward = 112.00, steps = 112\n",
      "12:11:31 [DEBUG] train episode 637: reward = 117.00, steps = 117\n",
      "12:11:37 [DEBUG] train episode 638: reward = 125.00, steps = 125\n",
      "12:11:41 [DEBUG] train episode 639: reward = 87.00, steps = 87\n",
      "12:11:45 [DEBUG] train episode 640: reward = 95.00, steps = 95\n",
      "12:11:49 [DEBUG] train episode 641: reward = 81.00, steps = 81\n",
      "12:11:54 [DEBUG] train episode 642: reward = 100.00, steps = 100\n",
      "12:12:00 [DEBUG] train episode 643: reward = 143.00, steps = 143\n",
      "12:12:04 [DEBUG] train episode 644: reward = 79.00, steps = 79\n",
      "12:12:11 [DEBUG] train episode 645: reward = 153.00, steps = 153\n",
      "12:12:18 [DEBUG] train episode 646: reward = 166.00, steps = 166\n",
      "12:12:22 [DEBUG] train episode 647: reward = 92.00, steps = 92\n",
      "12:12:28 [DEBUG] train episode 648: reward = 129.00, steps = 129\n",
      "12:12:33 [DEBUG] train episode 649: reward = 112.00, steps = 112\n",
      "12:12:37 [DEBUG] train episode 650: reward = 90.00, steps = 90\n",
      "12:12:41 [DEBUG] train episode 651: reward = 75.00, steps = 75\n",
      "12:12:48 [DEBUG] train episode 652: reward = 143.00, steps = 143\n",
      "12:12:53 [DEBUG] train episode 653: reward = 108.00, steps = 108\n",
      "12:12:58 [DEBUG] train episode 654: reward = 118.00, steps = 118\n",
      "12:13:03 [DEBUG] train episode 655: reward = 101.00, steps = 101\n",
      "12:13:11 [DEBUG] train episode 656: reward = 160.00, steps = 160\n",
      "12:13:17 [DEBUG] train episode 657: reward = 147.00, steps = 147\n",
      "12:13:22 [DEBUG] train episode 658: reward = 96.00, steps = 96\n",
      "12:13:26 [DEBUG] train episode 659: reward = 93.00, steps = 93\n",
      "12:13:33 [DEBUG] train episode 660: reward = 160.00, steps = 160\n",
      "12:13:38 [DEBUG] train episode 661: reward = 95.00, steps = 95\n",
      "12:13:44 [DEBUG] train episode 662: reward = 145.00, steps = 145\n",
      "12:13:53 [DEBUG] train episode 663: reward = 200.00, steps = 200\n",
      "12:13:57 [DEBUG] train episode 664: reward = 92.00, steps = 92\n",
      "12:14:02 [DEBUG] train episode 665: reward = 95.00, steps = 95\n",
      "12:14:05 [DEBUG] train episode 666: reward = 79.00, steps = 79\n",
      "12:14:10 [DEBUG] train episode 667: reward = 106.00, steps = 106\n",
      "12:14:15 [DEBUG] train episode 668: reward = 98.00, steps = 98\n",
      "12:14:22 [DEBUG] train episode 669: reward = 163.00, steps = 163\n",
      "12:14:25 [DEBUG] train episode 670: reward = 71.00, steps = 71\n",
      "12:14:31 [DEBUG] train episode 671: reward = 134.00, steps = 134\n",
      "12:14:37 [DEBUG] train episode 672: reward = 117.00, steps = 117\n",
      "12:14:43 [DEBUG] train episode 673: reward = 138.00, steps = 138\n",
      "12:14:48 [DEBUG] train episode 674: reward = 115.00, steps = 115\n",
      "12:14:55 [DEBUG] train episode 675: reward = 141.00, steps = 141\n",
      "12:14:59 [DEBUG] train episode 676: reward = 84.00, steps = 84\n",
      "12:15:04 [DEBUG] train episode 677: reward = 126.00, steps = 126\n",
      "12:15:10 [DEBUG] train episode 678: reward = 121.00, steps = 121\n",
      "12:15:15 [DEBUG] train episode 679: reward = 105.00, steps = 105\n",
      "12:15:19 [DEBUG] train episode 680: reward = 84.00, steps = 84\n",
      "12:15:24 [DEBUG] train episode 681: reward = 112.00, steps = 112\n",
      "12:15:28 [DEBUG] train episode 682: reward = 101.00, steps = 101\n",
      "12:15:32 [DEBUG] train episode 683: reward = 78.00, steps = 78\n",
      "12:15:38 [DEBUG] train episode 684: reward = 139.00, steps = 139\n",
      "12:15:44 [DEBUG] train episode 685: reward = 135.00, steps = 135\n",
      "12:15:51 [DEBUG] train episode 686: reward = 148.00, steps = 148\n",
      "12:15:58 [DEBUG] train episode 687: reward = 152.00, steps = 152\n",
      "12:16:03 [DEBUG] train episode 688: reward = 119.00, steps = 119\n",
      "12:16:08 [DEBUG] train episode 689: reward = 108.00, steps = 108\n",
      "12:16:15 [DEBUG] train episode 690: reward = 145.00, steps = 145\n",
      "12:16:20 [DEBUG] train episode 691: reward = 113.00, steps = 113\n",
      "12:16:26 [DEBUG] train episode 692: reward = 128.00, steps = 128\n",
      "12:16:31 [DEBUG] train episode 693: reward = 104.00, steps = 104\n",
      "12:16:35 [DEBUG] train episode 694: reward = 93.00, steps = 93\n",
      "12:16:38 [DEBUG] train episode 695: reward = 78.00, steps = 78\n",
      "12:16:42 [DEBUG] train episode 696: reward = 75.00, steps = 75\n",
      "12:16:46 [DEBUG] train episode 697: reward = 93.00, steps = 93\n",
      "12:16:54 [DEBUG] train episode 698: reward = 179.00, steps = 179\n",
      "12:16:59 [DEBUG] train episode 699: reward = 115.00, steps = 115\n",
      "12:17:07 [DEBUG] train episode 700: reward = 167.00, steps = 167\n",
      "12:17:16 [DEBUG] train episode 701: reward = 192.00, steps = 192\n",
      "12:17:20 [DEBUG] train episode 702: reward = 100.00, steps = 100\n",
      "12:17:29 [DEBUG] train episode 703: reward = 200.00, steps = 200\n",
      "12:17:35 [DEBUG] train episode 704: reward = 122.00, steps = 122\n",
      "12:17:44 [DEBUG] train episode 705: reward = 200.00, steps = 200\n",
      "12:17:49 [DEBUG] train episode 706: reward = 118.00, steps = 118\n",
      "12:17:54 [DEBUG] train episode 707: reward = 117.00, steps = 117\n",
      "12:17:59 [DEBUG] train episode 708: reward = 91.00, steps = 91\n",
      "12:18:03 [DEBUG] train episode 709: reward = 89.00, steps = 89\n",
      "12:18:06 [DEBUG] train episode 710: reward = 80.00, steps = 80\n",
      "12:18:15 [DEBUG] train episode 711: reward = 186.00, steps = 186\n",
      "12:18:20 [DEBUG] train episode 712: reward = 125.00, steps = 125\n",
      "12:18:30 [DEBUG] train episode 713: reward = 200.00, steps = 200\n",
      "12:18:33 [DEBUG] train episode 714: reward = 89.00, steps = 89\n",
      "12:18:41 [DEBUG] train episode 715: reward = 170.00, steps = 170\n",
      "12:18:46 [DEBUG] train episode 716: reward = 103.00, steps = 103\n",
      "12:18:55 [DEBUG] train episode 717: reward = 193.00, steps = 193\n",
      "12:19:01 [DEBUG] train episode 718: reward = 132.00, steps = 132\n",
      "12:19:07 [DEBUG] train episode 719: reward = 138.00, steps = 138\n",
      "12:19:12 [DEBUG] train episode 720: reward = 119.00, steps = 119\n",
      "12:19:20 [DEBUG] train episode 721: reward = 173.00, steps = 173\n",
      "12:19:27 [DEBUG] train episode 722: reward = 158.00, steps = 158\n",
      "12:19:33 [DEBUG] train episode 723: reward = 122.00, steps = 122\n",
      "12:19:38 [DEBUG] train episode 724: reward = 100.00, steps = 100\n",
      "12:19:44 [DEBUG] train episode 725: reward = 133.00, steps = 133\n",
      "12:19:49 [DEBUG] train episode 726: reward = 124.00, steps = 124\n",
      "12:19:56 [DEBUG] train episode 727: reward = 148.00, steps = 148\n",
      "12:20:03 [DEBUG] train episode 728: reward = 163.00, steps = 163\n",
      "12:20:08 [DEBUG] train episode 729: reward = 104.00, steps = 104\n",
      "12:20:17 [DEBUG] train episode 730: reward = 193.00, steps = 193\n",
      "12:20:22 [DEBUG] train episode 731: reward = 106.00, steps = 106\n",
      "12:20:27 [DEBUG] train episode 732: reward = 125.00, steps = 125\n",
      "12:20:34 [DEBUG] train episode 733: reward = 154.00, steps = 154\n",
      "12:20:39 [DEBUG] train episode 734: reward = 101.00, steps = 101\n",
      "12:20:47 [DEBUG] train episode 735: reward = 166.00, steps = 166\n",
      "12:20:54 [DEBUG] train episode 736: reward = 155.00, steps = 155\n",
      "12:20:58 [DEBUG] train episode 737: reward = 105.00, steps = 105\n",
      "12:21:03 [DEBUG] train episode 738: reward = 106.00, steps = 106\n",
      "12:21:08 [DEBUG] train episode 739: reward = 113.00, steps = 113\n",
      "12:21:12 [DEBUG] train episode 740: reward = 86.00, steps = 86\n",
      "12:21:19 [DEBUG] train episode 741: reward = 140.00, steps = 140\n",
      "12:21:26 [DEBUG] train episode 742: reward = 155.00, steps = 155\n",
      "12:21:33 [DEBUG] train episode 743: reward = 172.00, steps = 172\n",
      "12:21:39 [DEBUG] train episode 744: reward = 125.00, steps = 125\n",
      "12:21:47 [DEBUG] train episode 745: reward = 180.00, steps = 180\n",
      "12:21:53 [DEBUG] train episode 746: reward = 120.00, steps = 120\n",
      "12:22:00 [DEBUG] train episode 747: reward = 163.00, steps = 163\n",
      "12:22:06 [DEBUG] train episode 748: reward = 138.00, steps = 138\n",
      "12:22:11 [DEBUG] train episode 749: reward = 93.00, steps = 93\n",
      "12:22:18 [DEBUG] train episode 750: reward = 165.00, steps = 165\n",
      "12:22:24 [DEBUG] train episode 751: reward = 135.00, steps = 135\n",
      "12:22:33 [DEBUG] train episode 752: reward = 200.00, steps = 200\n",
      "12:22:41 [DEBUG] train episode 753: reward = 159.00, steps = 159\n",
      "12:22:49 [DEBUG] train episode 754: reward = 185.00, steps = 185\n",
      "12:22:58 [DEBUG] train episode 755: reward = 200.00, steps = 200\n",
      "12:23:06 [DEBUG] train episode 756: reward = 180.00, steps = 180\n",
      "12:23:12 [DEBUG] train episode 757: reward = 125.00, steps = 125\n",
      "12:23:16 [DEBUG] train episode 758: reward = 101.00, steps = 101\n",
      "12:23:21 [DEBUG] train episode 759: reward = 105.00, steps = 105\n",
      "12:23:29 [DEBUG] train episode 760: reward = 168.00, steps = 168\n",
      "12:23:33 [DEBUG] train episode 761: reward = 102.00, steps = 102\n",
      "12:23:39 [DEBUG] train episode 762: reward = 128.00, steps = 128\n",
      "12:23:48 [DEBUG] train episode 763: reward = 200.00, steps = 200\n",
      "12:23:55 [DEBUG] train episode 764: reward = 144.00, steps = 144\n",
      "12:24:02 [DEBUG] train episode 765: reward = 154.00, steps = 154\n",
      "12:24:11 [DEBUG] train episode 766: reward = 195.00, steps = 195\n",
      "12:24:17 [DEBUG] train episode 767: reward = 148.00, steps = 148\n",
      "12:24:24 [DEBUG] train episode 768: reward = 149.00, steps = 149\n",
      "12:24:30 [DEBUG] train episode 769: reward = 128.00, steps = 128\n",
      "12:24:36 [DEBUG] train episode 770: reward = 126.00, steps = 126\n",
      "12:24:42 [DEBUG] train episode 771: reward = 150.00, steps = 150\n",
      "12:24:48 [DEBUG] train episode 772: reward = 127.00, steps = 127\n",
      "12:24:58 [DEBUG] train episode 773: reward = 200.00, steps = 200\n",
      "12:25:05 [DEBUG] train episode 774: reward = 162.00, steps = 162\n",
      "12:25:15 [DEBUG] train episode 775: reward = 200.00, steps = 200\n",
      "12:25:16 [DEBUG] train episode 776: reward = 23.00, steps = 23\n",
      "12:25:22 [DEBUG] train episode 777: reward = 146.00, steps = 146\n",
      "12:25:30 [DEBUG] train episode 778: reward = 177.00, steps = 177\n",
      "12:25:35 [DEBUG] train episode 779: reward = 111.00, steps = 111\n",
      "12:25:43 [DEBUG] train episode 780: reward = 170.00, steps = 170\n",
      "12:25:50 [DEBUG] train episode 781: reward = 155.00, steps = 155\n",
      "12:25:56 [DEBUG] train episode 782: reward = 128.00, steps = 128\n",
      "12:26:03 [DEBUG] train episode 783: reward = 165.00, steps = 165\n",
      "12:26:09 [DEBUG] train episode 784: reward = 129.00, steps = 129\n",
      "12:26:18 [DEBUG] train episode 785: reward = 200.00, steps = 200\n",
      "12:26:26 [DEBUG] train episode 786: reward = 182.00, steps = 182\n",
      "12:26:34 [DEBUG] train episode 787: reward = 168.00, steps = 168\n",
      "12:26:39 [DEBUG] train episode 788: reward = 118.00, steps = 118\n",
      "12:26:47 [DEBUG] train episode 789: reward = 162.00, steps = 162\n",
      "12:26:50 [DEBUG] train episode 790: reward = 63.00, steps = 63\n",
      "12:26:55 [DEBUG] train episode 791: reward = 112.00, steps = 112\n",
      "12:26:57 [DEBUG] train episode 792: reward = 39.00, steps = 39\n",
      "12:27:03 [DEBUG] train episode 793: reward = 143.00, steps = 143\n",
      "12:27:09 [DEBUG] train episode 794: reward = 131.00, steps = 131\n",
      "12:27:14 [DEBUG] train episode 795: reward = 115.00, steps = 115\n",
      "12:27:20 [DEBUG] train episode 796: reward = 134.00, steps = 134\n",
      "12:27:24 [DEBUG] train episode 797: reward = 72.00, steps = 72\n",
      "12:27:30 [DEBUG] train episode 798: reward = 145.00, steps = 145\n",
      "12:27:32 [DEBUG] train episode 799: reward = 43.00, steps = 43\n",
      "12:27:40 [DEBUG] train episode 800: reward = 177.00, steps = 177\n",
      "12:27:43 [DEBUG] train episode 801: reward = 50.00, steps = 50\n",
      "12:27:51 [DEBUG] train episode 802: reward = 196.00, steps = 196\n",
      "12:28:00 [DEBUG] train episode 803: reward = 200.00, steps = 200\n",
      "12:28:10 [DEBUG] train episode 804: reward = 200.00, steps = 200\n",
      "12:28:15 [DEBUG] train episode 805: reward = 110.00, steps = 110\n",
      "12:28:22 [DEBUG] train episode 806: reward = 167.00, steps = 167\n",
      "12:28:27 [DEBUG] train episode 807: reward = 98.00, steps = 98\n",
      "12:28:32 [DEBUG] train episode 808: reward = 116.00, steps = 116\n",
      "12:28:39 [DEBUG] train episode 809: reward = 146.00, steps = 146\n",
      "12:28:47 [DEBUG] train episode 810: reward = 194.00, steps = 194\n",
      "12:28:54 [DEBUG] train episode 811: reward = 141.00, steps = 141\n",
      "12:29:00 [DEBUG] train episode 812: reward = 147.00, steps = 147\n",
      "12:29:07 [DEBUG] train episode 813: reward = 155.00, steps = 155\n",
      "12:29:12 [DEBUG] train episode 814: reward = 107.00, steps = 107\n",
      "12:29:17 [DEBUG] train episode 815: reward = 109.00, steps = 109\n",
      "12:29:25 [DEBUG] train episode 816: reward = 160.00, steps = 160\n",
      "12:29:32 [DEBUG] train episode 817: reward = 165.00, steps = 165\n",
      "12:29:41 [DEBUG] train episode 818: reward = 194.00, steps = 194\n",
      "12:29:47 [DEBUG] train episode 819: reward = 135.00, steps = 135\n",
      "12:29:55 [DEBUG] train episode 820: reward = 183.00, steps = 183\n",
      "12:30:04 [DEBUG] train episode 821: reward = 200.00, steps = 200\n",
      "12:30:10 [DEBUG] train episode 822: reward = 120.00, steps = 120\n",
      "12:30:15 [DEBUG] train episode 823: reward = 115.00, steps = 115\n",
      "12:30:23 [DEBUG] train episode 824: reward = 178.00, steps = 178\n",
      "12:30:30 [DEBUG] train episode 825: reward = 147.00, steps = 147\n",
      "12:30:36 [DEBUG] train episode 826: reward = 133.00, steps = 133\n",
      "12:30:41 [DEBUG] train episode 827: reward = 125.00, steps = 125\n",
      "12:30:46 [DEBUG] train episode 828: reward = 106.00, steps = 106\n",
      "12:30:52 [DEBUG] train episode 829: reward = 136.00, steps = 136\n",
      "12:30:59 [DEBUG] train episode 830: reward = 137.00, steps = 137\n",
      "12:31:06 [DEBUG] train episode 831: reward = 168.00, steps = 168\n",
      "12:31:12 [DEBUG] train episode 832: reward = 123.00, steps = 123\n",
      "12:31:20 [DEBUG] train episode 833: reward = 186.00, steps = 186\n",
      "12:31:26 [DEBUG] train episode 834: reward = 130.00, steps = 130\n",
      "12:31:32 [DEBUG] train episode 835: reward = 126.00, steps = 126\n",
      "12:31:41 [DEBUG] train episode 836: reward = 200.00, steps = 200\n",
      "12:31:50 [DEBUG] train episode 837: reward = 200.00, steps = 200\n",
      "12:31:57 [DEBUG] train episode 838: reward = 151.00, steps = 151\n",
      "12:32:06 [DEBUG] train episode 839: reward = 200.00, steps = 200\n",
      "12:32:12 [DEBUG] train episode 840: reward = 141.00, steps = 141\n",
      "12:32:17 [DEBUG] train episode 841: reward = 108.00, steps = 108\n",
      "12:32:22 [DEBUG] train episode 842: reward = 105.00, steps = 105\n",
      "12:32:28 [DEBUG] train episode 843: reward = 135.00, steps = 135\n",
      "12:32:33 [DEBUG] train episode 844: reward = 109.00, steps = 109\n",
      "12:32:42 [DEBUG] train episode 845: reward = 198.00, steps = 198\n",
      "12:32:47 [DEBUG] train episode 846: reward = 105.00, steps = 105\n",
      "12:32:51 [DEBUG] train episode 847: reward = 103.00, steps = 103\n",
      "12:32:59 [DEBUG] train episode 848: reward = 163.00, steps = 163\n",
      "12:33:04 [DEBUG] train episode 849: reward = 115.00, steps = 115\n",
      "12:33:11 [DEBUG] train episode 850: reward = 154.00, steps = 154\n",
      "12:33:17 [DEBUG] train episode 851: reward = 137.00, steps = 137\n",
      "12:33:22 [DEBUG] train episode 852: reward = 99.00, steps = 99\n",
      "12:33:30 [DEBUG] train episode 853: reward = 171.00, steps = 171\n",
      "12:33:38 [DEBUG] train episode 854: reward = 184.00, steps = 184\n",
      "12:33:43 [DEBUG] train episode 855: reward = 121.00, steps = 121\n",
      "12:33:49 [DEBUG] train episode 856: reward = 132.00, steps = 132\n",
      "12:33:54 [DEBUG] train episode 857: reward = 111.00, steps = 111\n",
      "12:34:01 [DEBUG] train episode 858: reward = 154.00, steps = 154\n",
      "12:34:07 [DEBUG] train episode 859: reward = 131.00, steps = 131\n",
      "12:34:13 [DEBUG] train episode 860: reward = 124.00, steps = 124\n",
      "12:34:19 [DEBUG] train episode 861: reward = 128.00, steps = 128\n",
      "12:34:28 [DEBUG] train episode 862: reward = 200.00, steps = 200\n",
      "12:34:34 [DEBUG] train episode 863: reward = 137.00, steps = 137\n",
      "12:34:42 [DEBUG] train episode 864: reward = 183.00, steps = 183\n",
      "12:34:47 [DEBUG] train episode 865: reward = 101.00, steps = 101\n",
      "12:34:52 [DEBUG] train episode 866: reward = 115.00, steps = 115\n",
      "12:34:58 [DEBUG] train episode 867: reward = 123.00, steps = 123\n",
      "12:35:03 [DEBUG] train episode 868: reward = 110.00, steps = 110\n",
      "12:35:11 [DEBUG] train episode 869: reward = 171.00, steps = 171\n",
      "12:35:16 [DEBUG] train episode 870: reward = 123.00, steps = 123\n",
      "12:35:22 [DEBUG] train episode 871: reward = 113.00, steps = 113\n",
      "12:35:29 [DEBUG] train episode 872: reward = 169.00, steps = 169\n",
      "12:35:36 [DEBUG] train episode 873: reward = 148.00, steps = 148\n",
      "12:35:42 [DEBUG] train episode 874: reward = 132.00, steps = 132\n",
      "12:35:47 [DEBUG] train episode 875: reward = 111.00, steps = 111\n",
      "12:35:54 [DEBUG] train episode 876: reward = 162.00, steps = 162\n",
      "12:36:03 [DEBUG] train episode 877: reward = 200.00, steps = 200\n",
      "12:36:11 [DEBUG] train episode 878: reward = 163.00, steps = 163\n",
      "12:36:18 [DEBUG] train episode 879: reward = 153.00, steps = 153\n",
      "12:36:25 [DEBUG] train episode 880: reward = 162.00, steps = 162\n",
      "12:36:31 [DEBUG] train episode 881: reward = 141.00, steps = 141\n",
      "12:36:37 [DEBUG] train episode 882: reward = 114.00, steps = 114\n",
      "12:36:46 [DEBUG] train episode 883: reward = 200.00, steps = 200\n",
      "12:36:54 [DEBUG] train episode 884: reward = 174.00, steps = 174\n",
      "12:36:59 [DEBUG] train episode 885: reward = 109.00, steps = 109\n",
      "12:37:05 [DEBUG] train episode 886: reward = 131.00, steps = 131\n",
      "12:37:10 [DEBUG] train episode 887: reward = 112.00, steps = 112\n",
      "12:37:20 [DEBUG] train episode 888: reward = 200.00, steps = 200\n",
      "12:37:26 [DEBUG] train episode 889: reward = 152.00, steps = 152\n",
      "12:37:33 [DEBUG] train episode 890: reward = 140.00, steps = 140\n",
      "12:37:39 [DEBUG] train episode 891: reward = 131.00, steps = 131\n",
      "12:37:45 [DEBUG] train episode 892: reward = 141.00, steps = 141\n",
      "12:37:51 [DEBUG] train episode 893: reward = 126.00, steps = 126\n",
      "12:37:55 [DEBUG] train episode 894: reward = 97.00, steps = 97\n",
      "12:38:02 [DEBUG] train episode 895: reward = 152.00, steps = 152\n",
      "12:38:09 [DEBUG] train episode 896: reward = 148.00, steps = 148\n",
      "12:38:14 [DEBUG] train episode 897: reward = 109.00, steps = 109\n",
      "12:38:23 [DEBUG] train episode 898: reward = 200.00, steps = 200\n",
      "12:38:28 [DEBUG] train episode 899: reward = 101.00, steps = 101\n",
      "12:38:34 [DEBUG] train episode 900: reward = 140.00, steps = 140\n",
      "12:38:40 [DEBUG] train episode 901: reward = 134.00, steps = 134\n",
      "12:38:47 [DEBUG] train episode 902: reward = 166.00, steps = 166\n",
      "12:38:56 [DEBUG] train episode 903: reward = 200.00, steps = 200\n",
      "12:39:03 [DEBUG] train episode 904: reward = 152.00, steps = 152\n",
      "12:39:10 [DEBUG] train episode 905: reward = 149.00, steps = 149\n",
      "12:39:15 [DEBUG] train episode 906: reward = 110.00, steps = 110\n",
      "12:39:20 [DEBUG] train episode 907: reward = 111.00, steps = 111\n",
      "12:39:27 [DEBUG] train episode 908: reward = 151.00, steps = 151\n",
      "12:39:34 [DEBUG] train episode 909: reward = 149.00, steps = 149\n",
      "12:39:41 [DEBUG] train episode 910: reward = 151.00, steps = 151\n",
      "12:39:46 [DEBUG] train episode 911: reward = 110.00, steps = 110\n",
      "12:39:53 [DEBUG] train episode 912: reward = 165.00, steps = 165\n",
      "12:40:01 [DEBUG] train episode 913: reward = 165.00, steps = 165\n",
      "12:40:09 [DEBUG] train episode 914: reward = 183.00, steps = 183\n",
      "12:40:16 [DEBUG] train episode 915: reward = 147.00, steps = 147\n",
      "12:40:23 [DEBUG] train episode 916: reward = 164.00, steps = 164\n",
      "12:40:30 [DEBUG] train episode 917: reward = 144.00, steps = 144\n",
      "12:40:39 [DEBUG] train episode 918: reward = 197.00, steps = 197\n",
      "12:40:45 [DEBUG] train episode 919: reward = 118.00, steps = 118\n",
      "12:40:52 [DEBUG] train episode 920: reward = 165.00, steps = 165\n",
      "12:40:59 [DEBUG] train episode 921: reward = 140.00, steps = 140\n",
      "12:41:04 [DEBUG] train episode 922: reward = 116.00, steps = 116\n",
      "12:41:09 [DEBUG] train episode 923: reward = 117.00, steps = 117\n",
      "12:41:15 [DEBUG] train episode 924: reward = 132.00, steps = 132\n",
      "12:41:20 [DEBUG] train episode 925: reward = 94.00, steps = 94\n",
      "12:41:25 [DEBUG] train episode 926: reward = 122.00, steps = 122\n",
      "12:41:32 [DEBUG] train episode 927: reward = 150.00, steps = 150\n",
      "12:41:40 [DEBUG] train episode 928: reward = 172.00, steps = 172\n",
      "12:41:46 [DEBUG] train episode 929: reward = 126.00, steps = 126\n",
      "12:41:51 [DEBUG] train episode 930: reward = 130.00, steps = 130\n",
      "12:41:58 [DEBUG] train episode 931: reward = 140.00, steps = 140\n",
      "12:42:03 [DEBUG] train episode 932: reward = 107.00, steps = 107\n",
      "12:42:12 [DEBUG] train episode 933: reward = 200.00, steps = 200\n",
      "12:42:18 [DEBUG] train episode 934: reward = 129.00, steps = 129\n",
      "12:42:27 [DEBUG] train episode 935: reward = 200.00, steps = 200\n",
      "12:42:33 [DEBUG] train episode 936: reward = 148.00, steps = 148\n",
      "12:42:41 [DEBUG] train episode 937: reward = 164.00, steps = 164\n",
      "12:42:47 [DEBUG] train episode 938: reward = 132.00, steps = 132\n",
      "12:42:52 [DEBUG] train episode 939: reward = 108.00, steps = 108\n",
      "12:42:59 [DEBUG] train episode 940: reward = 149.00, steps = 149\n",
      "12:43:04 [DEBUG] train episode 941: reward = 121.00, steps = 121\n",
      "12:43:11 [DEBUG] train episode 942: reward = 148.00, steps = 148\n",
      "12:43:17 [DEBUG] train episode 943: reward = 138.00, steps = 138\n",
      "12:43:26 [DEBUG] train episode 944: reward = 200.00, steps = 200\n",
      "12:43:31 [DEBUG] train episode 945: reward = 112.00, steps = 112\n",
      "12:43:37 [DEBUG] train episode 946: reward = 123.00, steps = 123\n",
      "12:43:46 [DEBUG] train episode 947: reward = 200.00, steps = 200\n",
      "12:43:52 [DEBUG] train episode 948: reward = 140.00, steps = 140\n",
      "12:44:01 [DEBUG] train episode 949: reward = 185.00, steps = 185\n",
      "12:44:06 [DEBUG] train episode 950: reward = 112.00, steps = 112\n",
      "12:44:11 [DEBUG] train episode 951: reward = 127.00, steps = 127\n",
      "12:44:18 [DEBUG] train episode 952: reward = 154.00, steps = 154\n",
      "12:44:23 [DEBUG] train episode 953: reward = 100.00, steps = 100\n",
      "12:44:30 [DEBUG] train episode 954: reward = 154.00, steps = 154\n",
      "12:44:38 [DEBUG] train episode 955: reward = 178.00, steps = 178\n",
      "12:44:46 [DEBUG] train episode 956: reward = 163.00, steps = 163\n",
      "12:44:51 [DEBUG] train episode 957: reward = 104.00, steps = 104\n",
      "12:45:00 [DEBUG] train episode 958: reward = 200.00, steps = 200\n",
      "12:45:09 [DEBUG] train episode 959: reward = 196.00, steps = 196\n",
      "12:45:17 [DEBUG] train episode 960: reward = 177.00, steps = 177\n",
      "12:45:22 [DEBUG] train episode 961: reward = 119.00, steps = 119\n",
      "12:45:29 [DEBUG] train episode 962: reward = 148.00, steps = 148\n",
      "12:45:35 [DEBUG] train episode 963: reward = 122.00, steps = 122\n",
      "12:45:40 [DEBUG] train episode 964: reward = 113.00, steps = 113\n",
      "12:45:48 [DEBUG] train episode 965: reward = 175.00, steps = 175\n",
      "12:45:54 [DEBUG] train episode 966: reward = 147.00, steps = 147\n",
      "12:46:01 [DEBUG] train episode 967: reward = 147.00, steps = 147\n",
      "12:46:07 [DEBUG] train episode 968: reward = 130.00, steps = 130\n",
      "12:46:14 [DEBUG] train episode 969: reward = 162.00, steps = 162\n",
      "12:46:22 [DEBUG] train episode 970: reward = 179.00, steps = 179\n",
      "12:46:28 [DEBUG] train episode 971: reward = 130.00, steps = 130\n",
      "12:46:37 [DEBUG] train episode 972: reward = 199.00, steps = 199\n",
      "12:46:46 [DEBUG] train episode 973: reward = 200.00, steps = 200\n",
      "12:46:51 [DEBUG] train episode 974: reward = 109.00, steps = 109\n",
      "12:47:00 [DEBUG] train episode 975: reward = 193.00, steps = 193\n",
      "12:47:04 [DEBUG] train episode 976: reward = 97.00, steps = 97\n",
      "12:47:12 [DEBUG] train episode 977: reward = 165.00, steps = 165\n",
      "12:47:17 [DEBUG] train episode 978: reward = 115.00, steps = 115\n",
      "12:47:24 [DEBUG] train episode 979: reward = 159.00, steps = 159\n",
      "12:47:31 [DEBUG] train episode 980: reward = 139.00, steps = 139\n",
      "12:47:38 [DEBUG] train episode 981: reward = 173.00, steps = 173\n",
      "12:47:47 [DEBUG] train episode 982: reward = 195.00, steps = 195\n",
      "12:47:53 [DEBUG] train episode 983: reward = 129.00, steps = 129\n",
      "12:48:01 [DEBUG] train episode 984: reward = 171.00, steps = 171\n",
      "12:48:08 [DEBUG] train episode 985: reward = 157.00, steps = 157\n",
      "12:48:13 [DEBUG] train episode 986: reward = 110.00, steps = 110\n",
      "12:48:18 [DEBUG] train episode 987: reward = 113.00, steps = 113\n",
      "12:48:22 [DEBUG] train episode 988: reward = 90.00, steps = 90\n",
      "12:48:28 [DEBUG] train episode 989: reward = 125.00, steps = 125\n",
      "12:48:33 [DEBUG] train episode 990: reward = 104.00, steps = 104\n",
      "12:48:38 [DEBUG] train episode 991: reward = 119.00, steps = 119\n",
      "12:48:44 [DEBUG] train episode 992: reward = 127.00, steps = 127\n",
      "12:48:51 [DEBUG] train episode 993: reward = 157.00, steps = 157\n",
      "12:48:57 [DEBUG] train episode 994: reward = 116.00, steps = 116\n",
      "12:49:01 [DEBUG] train episode 995: reward = 104.00, steps = 104\n",
      "12:49:05 [DEBUG] train episode 996: reward = 88.00, steps = 88\n",
      "12:49:13 [DEBUG] train episode 997: reward = 179.00, steps = 179\n",
      "12:49:21 [DEBUG] train episode 998: reward = 159.00, steps = 159\n",
      "12:49:26 [DEBUG] train episode 999: reward = 125.00, steps = 125\n",
      "12:49:31 [DEBUG] train episode 1000: reward = 98.00, steps = 98\n",
      "12:49:35 [DEBUG] train episode 1001: reward = 96.00, steps = 96\n",
      "12:49:41 [DEBUG] train episode 1002: reward = 123.00, steps = 123\n",
      "12:49:45 [DEBUG] train episode 1003: reward = 93.00, steps = 93\n",
      "12:49:50 [DEBUG] train episode 1004: reward = 111.00, steps = 111\n",
      "12:49:57 [DEBUG] train episode 1005: reward = 151.00, steps = 151\n",
      "12:50:02 [DEBUG] train episode 1006: reward = 111.00, steps = 111\n",
      "12:50:10 [DEBUG] train episode 1007: reward = 170.00, steps = 170\n",
      "12:50:14 [DEBUG] train episode 1008: reward = 95.00, steps = 95\n",
      "12:50:19 [DEBUG] train episode 1009: reward = 105.00, steps = 105\n",
      "12:50:25 [DEBUG] train episode 1010: reward = 130.00, steps = 130\n",
      "12:50:30 [DEBUG] train episode 1011: reward = 101.00, steps = 101\n",
      "12:50:36 [DEBUG] train episode 1012: reward = 130.00, steps = 130\n",
      "12:50:39 [DEBUG] train episode 1013: reward = 83.00, steps = 83\n",
      "12:50:45 [DEBUG] train episode 1014: reward = 127.00, steps = 127\n",
      "12:50:53 [DEBUG] train episode 1015: reward = 163.00, steps = 163\n",
      "12:51:00 [DEBUG] train episode 1016: reward = 166.00, steps = 166\n",
      "12:51:06 [DEBUG] train episode 1017: reward = 130.00, steps = 130\n",
      "12:51:10 [DEBUG] train episode 1018: reward = 95.00, steps = 95\n",
      "12:51:16 [DEBUG] train episode 1019: reward = 134.00, steps = 134\n",
      "12:51:23 [DEBUG] train episode 1020: reward = 139.00, steps = 139\n",
      "12:51:27 [DEBUG] train episode 1021: reward = 96.00, steps = 96\n",
      "12:51:34 [DEBUG] train episode 1022: reward = 155.00, steps = 155\n",
      "12:51:39 [DEBUG] train episode 1023: reward = 107.00, steps = 107\n",
      "12:51:44 [DEBUG] train episode 1024: reward = 107.00, steps = 107\n",
      "12:51:49 [DEBUG] train episode 1025: reward = 105.00, steps = 105\n",
      "12:51:56 [DEBUG] train episode 1026: reward = 147.00, steps = 147\n",
      "12:52:01 [DEBUG] train episode 1027: reward = 125.00, steps = 125\n",
      "12:52:06 [DEBUG] train episode 1028: reward = 108.00, steps = 108\n",
      "12:52:11 [DEBUG] train episode 1029: reward = 108.00, steps = 108\n",
      "12:52:17 [DEBUG] train episode 1030: reward = 129.00, steps = 129\n",
      "12:52:22 [DEBUG] train episode 1031: reward = 113.00, steps = 113\n",
      "12:52:26 [DEBUG] train episode 1032: reward = 80.00, steps = 80\n",
      "12:52:31 [DEBUG] train episode 1033: reward = 106.00, steps = 106\n",
      "12:52:35 [DEBUG] train episode 1034: reward = 91.00, steps = 91\n",
      "12:52:40 [DEBUG] train episode 1035: reward = 106.00, steps = 106\n",
      "12:52:45 [DEBUG] train episode 1036: reward = 121.00, steps = 121\n",
      "12:52:49 [DEBUG] train episode 1037: reward = 91.00, steps = 91\n",
      "12:52:55 [DEBUG] train episode 1038: reward = 114.00, steps = 114\n",
      "12:52:59 [DEBUG] train episode 1039: reward = 103.00, steps = 103\n",
      "12:53:05 [DEBUG] train episode 1040: reward = 111.00, steps = 111\n",
      "12:53:11 [DEBUG] train episode 1041: reward = 154.00, steps = 154\n",
      "12:53:16 [DEBUG] train episode 1042: reward = 93.00, steps = 93\n",
      "12:53:22 [DEBUG] train episode 1043: reward = 129.00, steps = 129\n",
      "12:53:31 [DEBUG] train episode 1044: reward = 200.00, steps = 200\n",
      "12:53:37 [DEBUG] train episode 1045: reward = 128.00, steps = 128\n",
      "12:53:42 [DEBUG] train episode 1046: reward = 121.00, steps = 121\n",
      "12:53:51 [DEBUG] train episode 1047: reward = 200.00, steps = 200\n",
      "12:53:57 [DEBUG] train episode 1048: reward = 130.00, steps = 130\n",
      "12:54:01 [DEBUG] train episode 1049: reward = 93.00, steps = 93\n",
      "12:54:06 [DEBUG] train episode 1050: reward = 94.00, steps = 94\n",
      "12:54:11 [DEBUG] train episode 1051: reward = 118.00, steps = 118\n",
      "12:54:19 [DEBUG] train episode 1052: reward = 171.00, steps = 171\n",
      "12:54:23 [DEBUG] train episode 1053: reward = 101.00, steps = 101\n",
      "12:54:28 [DEBUG] train episode 1054: reward = 95.00, steps = 95\n",
      "12:54:36 [DEBUG] train episode 1055: reward = 180.00, steps = 180\n",
      "12:54:40 [DEBUG] train episode 1056: reward = 98.00, steps = 98\n",
      "12:54:45 [DEBUG] train episode 1057: reward = 109.00, steps = 109\n",
      "12:54:51 [DEBUG] train episode 1058: reward = 122.00, steps = 122\n",
      "12:54:56 [DEBUG] train episode 1059: reward = 100.00, steps = 100\n",
      "12:55:02 [DEBUG] train episode 1060: reward = 142.00, steps = 142\n",
      "12:55:06 [DEBUG] train episode 1061: reward = 96.00, steps = 96\n",
      "12:55:12 [DEBUG] train episode 1062: reward = 130.00, steps = 130\n",
      "12:55:18 [DEBUG] train episode 1063: reward = 126.00, steps = 126\n",
      "12:55:28 [DEBUG] train episode 1064: reward = 200.00, steps = 200\n",
      "12:55:34 [DEBUG] train episode 1065: reward = 132.00, steps = 132\n",
      "12:55:38 [DEBUG] train episode 1066: reward = 98.00, steps = 98\n",
      "12:55:43 [DEBUG] train episode 1067: reward = 90.00, steps = 90\n",
      "12:55:48 [DEBUG] train episode 1068: reward = 128.00, steps = 128\n",
      "12:55:57 [DEBUG] train episode 1069: reward = 182.00, steps = 182\n",
      "12:56:01 [DEBUG] train episode 1070: reward = 105.00, steps = 105\n",
      "12:56:07 [DEBUG] train episode 1071: reward = 131.00, steps = 131\n",
      "12:56:12 [DEBUG] train episode 1072: reward = 100.00, steps = 100\n",
      "12:56:20 [DEBUG] train episode 1073: reward = 186.00, steps = 186\n",
      "12:56:29 [DEBUG] train episode 1074: reward = 193.00, steps = 193\n",
      "12:56:34 [DEBUG] train episode 1075: reward = 100.00, steps = 100\n",
      "12:56:43 [DEBUG] train episode 1076: reward = 200.00, steps = 200\n",
      "12:56:49 [DEBUG] train episode 1077: reward = 146.00, steps = 146\n",
      "12:56:58 [DEBUG] train episode 1078: reward = 200.00, steps = 200\n",
      "12:57:04 [DEBUG] train episode 1079: reward = 116.00, steps = 116\n",
      "12:57:10 [DEBUG] train episode 1080: reward = 131.00, steps = 131\n",
      "12:57:16 [DEBUG] train episode 1081: reward = 144.00, steps = 144\n",
      "12:57:22 [DEBUG] train episode 1082: reward = 118.00, steps = 118\n",
      "12:57:28 [DEBUG] train episode 1083: reward = 130.00, steps = 130\n",
      "12:57:37 [DEBUG] train episode 1084: reward = 200.00, steps = 200\n",
      "12:57:43 [DEBUG] train episode 1085: reward = 154.00, steps = 154\n",
      "12:57:50 [DEBUG] train episode 1086: reward = 155.00, steps = 155\n",
      "12:57:59 [DEBUG] train episode 1087: reward = 179.00, steps = 179\n",
      "12:58:04 [DEBUG] train episode 1088: reward = 119.00, steps = 119\n",
      "12:58:08 [DEBUG] train episode 1089: reward = 93.00, steps = 93\n",
      "12:58:13 [DEBUG] train episode 1090: reward = 111.00, steps = 111\n",
      "12:58:18 [DEBUG] train episode 1091: reward = 101.00, steps = 101\n",
      "12:58:23 [DEBUG] train episode 1092: reward = 97.00, steps = 97\n",
      "12:58:27 [DEBUG] train episode 1093: reward = 102.00, steps = 102\n",
      "12:58:33 [DEBUG] train episode 1094: reward = 133.00, steps = 133\n",
      "12:58:37 [DEBUG] train episode 1095: reward = 88.00, steps = 88\n",
      "12:58:43 [DEBUG] train episode 1096: reward = 126.00, steps = 126\n",
      "12:58:50 [DEBUG] train episode 1097: reward = 154.00, steps = 154\n",
      "12:58:56 [DEBUG] train episode 1098: reward = 142.00, steps = 142\n",
      "12:59:02 [DEBUG] train episode 1099: reward = 115.00, steps = 115\n",
      "12:59:06 [DEBUG] train episode 1100: reward = 95.00, steps = 95\n",
      "12:59:12 [DEBUG] train episode 1101: reward = 123.00, steps = 123\n",
      "12:59:17 [DEBUG] train episode 1102: reward = 119.00, steps = 119\n",
      "12:59:22 [DEBUG] train episode 1103: reward = 119.00, steps = 119\n",
      "12:59:29 [DEBUG] train episode 1104: reward = 140.00, steps = 140\n",
      "12:59:34 [DEBUG] train episode 1105: reward = 119.00, steps = 119\n",
      "12:59:39 [DEBUG] train episode 1106: reward = 112.00, steps = 112\n",
      "12:59:47 [DEBUG] train episode 1107: reward = 171.00, steps = 171\n",
      "12:59:53 [DEBUG] train episode 1108: reward = 141.00, steps = 141\n",
      "12:59:58 [DEBUG] train episode 1109: reward = 109.00, steps = 109\n",
      "13:00:04 [DEBUG] train episode 1110: reward = 130.00, steps = 130\n",
      "13:00:08 [DEBUG] train episode 1111: reward = 84.00, steps = 84\n",
      "13:00:14 [DEBUG] train episode 1112: reward = 121.00, steps = 121\n",
      "13:00:20 [DEBUG] train episode 1113: reward = 143.00, steps = 143\n",
      "13:00:25 [DEBUG] train episode 1114: reward = 114.00, steps = 114\n",
      "13:00:30 [DEBUG] train episode 1115: reward = 98.00, steps = 98\n",
      "13:00:39 [DEBUG] train episode 1116: reward = 194.00, steps = 194\n",
      "13:00:43 [DEBUG] train episode 1117: reward = 99.00, steps = 99\n",
      "13:00:49 [DEBUG] train episode 1118: reward = 134.00, steps = 134\n",
      "13:00:55 [DEBUG] train episode 1119: reward = 130.00, steps = 130\n",
      "13:01:00 [DEBUG] train episode 1120: reward = 95.00, steps = 95\n",
      "13:01:04 [DEBUG] train episode 1121: reward = 101.00, steps = 101\n",
      "13:01:11 [DEBUG] train episode 1122: reward = 146.00, steps = 146\n",
      "13:01:19 [DEBUG] train episode 1123: reward = 183.00, steps = 183\n",
      "13:01:27 [DEBUG] train episode 1124: reward = 181.00, steps = 181\n",
      "13:01:33 [DEBUG] train episode 1125: reward = 128.00, steps = 128\n",
      "13:01:39 [DEBUG] train episode 1126: reward = 138.00, steps = 138\n",
      "13:01:45 [DEBUG] train episode 1127: reward = 111.00, steps = 111\n",
      "13:01:53 [DEBUG] train episode 1128: reward = 189.00, steps = 189\n",
      "13:01:59 [DEBUG] train episode 1129: reward = 124.00, steps = 124\n",
      "13:02:03 [DEBUG] train episode 1130: reward = 94.00, steps = 94\n",
      "13:02:09 [DEBUG] train episode 1131: reward = 122.00, steps = 122\n",
      "13:02:18 [DEBUG] train episode 1132: reward = 200.00, steps = 200\n",
      "13:02:22 [DEBUG] train episode 1133: reward = 102.00, steps = 102\n",
      "13:02:31 [DEBUG] train episode 1134: reward = 200.00, steps = 200\n",
      "13:02:38 [DEBUG] train episode 1135: reward = 139.00, steps = 139\n",
      "13:02:43 [DEBUG] train episode 1136: reward = 122.00, steps = 122\n",
      "13:02:48 [DEBUG] train episode 1137: reward = 106.00, steps = 106\n",
      "13:02:53 [DEBUG] train episode 1138: reward = 101.00, steps = 101\n",
      "13:02:58 [DEBUG] train episode 1139: reward = 110.00, steps = 110\n",
      "13:03:06 [DEBUG] train episode 1140: reward = 168.00, steps = 168\n",
      "13:03:15 [DEBUG] train episode 1141: reward = 200.00, steps = 200\n",
      "13:03:20 [DEBUG] train episode 1142: reward = 123.00, steps = 123\n",
      "13:03:26 [DEBUG] train episode 1143: reward = 138.00, steps = 138\n",
      "13:03:32 [DEBUG] train episode 1144: reward = 116.00, steps = 116\n",
      "13:03:39 [DEBUG] train episode 1145: reward = 162.00, steps = 162\n",
      "13:03:45 [DEBUG] train episode 1146: reward = 137.00, steps = 137\n",
      "13:03:53 [DEBUG] train episode 1147: reward = 173.00, steps = 173\n",
      "13:03:58 [DEBUG] train episode 1148: reward = 116.00, steps = 116\n",
      "13:04:03 [DEBUG] train episode 1149: reward = 106.00, steps = 106\n",
      "13:04:08 [DEBUG] train episode 1150: reward = 110.00, steps = 110\n",
      "13:04:13 [DEBUG] train episode 1151: reward = 110.00, steps = 110\n",
      "13:04:21 [DEBUG] train episode 1152: reward = 158.00, steps = 158\n",
      "13:04:28 [DEBUG] train episode 1153: reward = 164.00, steps = 164\n",
      "13:04:36 [DEBUG] train episode 1154: reward = 175.00, steps = 175\n",
      "13:04:40 [DEBUG] train episode 1155: reward = 88.00, steps = 88\n",
      "13:04:45 [DEBUG] train episode 1156: reward = 111.00, steps = 111\n",
      "13:04:54 [DEBUG] train episode 1157: reward = 182.00, steps = 182\n",
      "13:04:59 [DEBUG] train episode 1158: reward = 120.00, steps = 120\n",
      "13:05:03 [DEBUG] train episode 1159: reward = 89.00, steps = 89\n",
      "13:05:08 [DEBUG] train episode 1160: reward = 118.00, steps = 118\n",
      "13:05:15 [DEBUG] train episode 1161: reward = 155.00, steps = 155\n",
      "13:05:19 [DEBUG] train episode 1162: reward = 82.00, steps = 82\n",
      "13:05:24 [DEBUG] train episode 1163: reward = 111.00, steps = 111\n",
      "13:05:32 [DEBUG] train episode 1164: reward = 173.00, steps = 173\n",
      "13:05:37 [DEBUG] train episode 1165: reward = 101.00, steps = 101\n",
      "13:05:43 [DEBUG] train episode 1166: reward = 128.00, steps = 128\n",
      "13:05:49 [DEBUG] train episode 1167: reward = 133.00, steps = 133\n",
      "13:05:55 [DEBUG] train episode 1168: reward = 136.00, steps = 136\n",
      "13:06:01 [DEBUG] train episode 1169: reward = 137.00, steps = 137\n",
      "13:06:05 [DEBUG] train episode 1170: reward = 94.00, steps = 94\n",
      "13:06:13 [DEBUG] train episode 1171: reward = 163.00, steps = 163\n",
      "13:06:17 [DEBUG] train episode 1172: reward = 102.00, steps = 102\n",
      "13:06:25 [DEBUG] train episode 1173: reward = 173.00, steps = 173\n",
      "13:06:34 [DEBUG] train episode 1174: reward = 200.00, steps = 200\n",
      "13:06:42 [DEBUG] train episode 1175: reward = 180.00, steps = 180\n",
      "13:06:48 [DEBUG] train episode 1176: reward = 125.00, steps = 125\n",
      "13:06:53 [DEBUG] train episode 1177: reward = 104.00, steps = 104\n",
      "13:06:59 [DEBUG] train episode 1178: reward = 123.00, steps = 123\n",
      "13:07:03 [DEBUG] train episode 1179: reward = 102.00, steps = 102\n",
      "13:07:08 [DEBUG] train episode 1180: reward = 105.00, steps = 105\n",
      "13:07:12 [DEBUG] train episode 1181: reward = 89.00, steps = 89\n",
      "13:07:17 [DEBUG] train episode 1182: reward = 101.00, steps = 101\n",
      "13:07:22 [DEBUG] train episode 1183: reward = 106.00, steps = 106\n",
      "13:07:26 [DEBUG] train episode 1184: reward = 98.00, steps = 98\n",
      "13:07:32 [DEBUG] train episode 1185: reward = 119.00, steps = 119\n",
      "13:07:37 [DEBUG] train episode 1186: reward = 106.00, steps = 106\n",
      "13:07:42 [DEBUG] train episode 1187: reward = 114.00, steps = 114\n",
      "13:07:47 [DEBUG] train episode 1188: reward = 95.00, steps = 95\n",
      "13:07:53 [DEBUG] train episode 1189: reward = 127.00, steps = 127\n",
      "13:07:57 [DEBUG] train episode 1190: reward = 101.00, steps = 101\n",
      "13:08:02 [DEBUG] train episode 1191: reward = 106.00, steps = 106\n",
      "13:08:08 [DEBUG] train episode 1192: reward = 120.00, steps = 120\n",
      "13:08:16 [DEBUG] train episode 1193: reward = 182.00, steps = 182\n",
      "13:08:22 [DEBUG] train episode 1194: reward = 125.00, steps = 125\n",
      "13:08:27 [DEBUG] train episode 1195: reward = 127.00, steps = 127\n",
      "13:08:34 [DEBUG] train episode 1196: reward = 125.00, steps = 125\n",
      "13:08:40 [DEBUG] train episode 1197: reward = 128.00, steps = 128\n",
      "13:08:44 [DEBUG] train episode 1198: reward = 93.00, steps = 93\n",
      "13:08:49 [DEBUG] train episode 1199: reward = 116.00, steps = 116\n",
      "13:08:57 [DEBUG] train episode 1200: reward = 155.00, steps = 155\n",
      "13:09:02 [DEBUG] train episode 1201: reward = 114.00, steps = 114\n",
      "13:09:10 [DEBUG] train episode 1202: reward = 153.00, steps = 153\n",
      "13:09:17 [DEBUG] train episode 1203: reward = 135.00, steps = 135\n",
      "13:09:27 [DEBUG] train episode 1204: reward = 200.00, steps = 200\n",
      "13:09:32 [DEBUG] train episode 1205: reward = 93.00, steps = 93\n",
      "13:09:37 [DEBUG] train episode 1206: reward = 96.00, steps = 96\n",
      "13:09:43 [DEBUG] train episode 1207: reward = 128.00, steps = 128\n",
      "13:09:48 [DEBUG] train episode 1208: reward = 97.00, steps = 97\n",
      "13:09:55 [DEBUG] train episode 1209: reward = 133.00, steps = 133\n",
      "13:10:03 [DEBUG] train episode 1210: reward = 166.00, steps = 166\n",
      "13:10:08 [DEBUG] train episode 1211: reward = 92.00, steps = 92\n",
      "13:10:14 [DEBUG] train episode 1212: reward = 113.00, steps = 113\n",
      "13:10:20 [DEBUG] train episode 1213: reward = 107.00, steps = 107\n",
      "13:10:27 [DEBUG] train episode 1214: reward = 126.00, steps = 126\n",
      "13:10:34 [DEBUG] train episode 1215: reward = 136.00, steps = 136\n",
      "13:10:41 [DEBUG] train episode 1216: reward = 127.00, steps = 127\n",
      "13:10:49 [DEBUG] train episode 1217: reward = 162.00, steps = 162\n",
      "13:10:54 [DEBUG] train episode 1218: reward = 93.00, steps = 93\n",
      "13:10:59 [DEBUG] train episode 1219: reward = 99.00, steps = 99\n",
      "13:11:05 [DEBUG] train episode 1220: reward = 97.00, steps = 97\n",
      "13:11:13 [DEBUG] train episode 1221: reward = 124.00, steps = 124\n",
      "13:11:22 [DEBUG] train episode 1222: reward = 153.00, steps = 153\n",
      "13:11:33 [DEBUG] train episode 1223: reward = 200.00, steps = 200\n",
      "13:11:39 [DEBUG] train episode 1224: reward = 109.00, steps = 109\n",
      "13:11:46 [DEBUG] train episode 1225: reward = 129.00, steps = 129\n",
      "13:11:56 [DEBUG] train episode 1226: reward = 133.00, steps = 133\n",
      "13:12:03 [DEBUG] train episode 1227: reward = 116.00, steps = 116\n",
      "13:12:11 [DEBUG] train episode 1228: reward = 136.00, steps = 136\n",
      "13:12:18 [DEBUG] train episode 1229: reward = 122.00, steps = 122\n",
      "13:12:27 [DEBUG] train episode 1230: reward = 166.00, steps = 166\n",
      "13:12:34 [DEBUG] train episode 1231: reward = 128.00, steps = 128\n",
      "13:12:40 [DEBUG] train episode 1232: reward = 103.00, steps = 103\n",
      "13:12:46 [DEBUG] train episode 1233: reward = 103.00, steps = 103\n",
      "13:12:53 [DEBUG] train episode 1234: reward = 124.00, steps = 124\n",
      "13:13:02 [DEBUG] train episode 1235: reward = 147.00, steps = 147\n",
      "13:13:10 [DEBUG] train episode 1236: reward = 141.00, steps = 141\n",
      "13:13:17 [DEBUG] train episode 1237: reward = 128.00, steps = 128\n",
      "13:13:25 [DEBUG] train episode 1238: reward = 131.00, steps = 131\n",
      "13:13:31 [DEBUG] train episode 1239: reward = 107.00, steps = 107\n",
      "13:13:38 [DEBUG] train episode 1240: reward = 125.00, steps = 125\n",
      "13:13:47 [DEBUG] train episode 1241: reward = 160.00, steps = 160\n",
      "13:13:54 [DEBUG] train episode 1242: reward = 119.00, steps = 119\n",
      "13:14:00 [DEBUG] train episode 1243: reward = 107.00, steps = 107\n",
      "13:14:06 [DEBUG] train episode 1244: reward = 97.00, steps = 97\n",
      "13:14:12 [DEBUG] train episode 1245: reward = 112.00, steps = 112\n",
      "13:14:18 [DEBUG] train episode 1246: reward = 107.00, steps = 107\n",
      "13:14:27 [DEBUG] train episode 1247: reward = 151.00, steps = 151\n",
      "13:14:34 [DEBUG] train episode 1248: reward = 124.00, steps = 124\n",
      "13:14:44 [DEBUG] train episode 1249: reward = 172.00, steps = 172\n",
      "13:14:51 [DEBUG] train episode 1250: reward = 131.00, steps = 131\n",
      "13:14:58 [DEBUG] train episode 1251: reward = 115.00, steps = 115\n",
      "13:15:05 [DEBUG] train episode 1252: reward = 127.00, steps = 127\n",
      "13:15:11 [DEBUG] train episode 1253: reward = 119.00, steps = 119\n",
      "13:15:17 [DEBUG] train episode 1254: reward = 86.00, steps = 86\n",
      "13:15:23 [DEBUG] train episode 1255: reward = 120.00, steps = 120\n",
      "13:15:30 [DEBUG] train episode 1256: reward = 119.00, steps = 119\n",
      "13:15:39 [DEBUG] train episode 1257: reward = 146.00, steps = 146\n",
      "13:15:45 [DEBUG] train episode 1258: reward = 104.00, steps = 104\n",
      "13:15:53 [DEBUG] train episode 1259: reward = 146.00, steps = 146\n",
      "13:16:01 [DEBUG] train episode 1260: reward = 139.00, steps = 139\n",
      "13:16:10 [DEBUG] train episode 1261: reward = 167.00, steps = 167\n",
      "13:16:17 [DEBUG] train episode 1262: reward = 123.00, steps = 123\n",
      "13:16:24 [DEBUG] train episode 1263: reward = 127.00, steps = 127\n",
      "13:16:35 [DEBUG] train episode 1264: reward = 200.00, steps = 200\n",
      "13:16:47 [DEBUG] train episode 1265: reward = 197.00, steps = 197\n",
      "13:16:53 [DEBUG] train episode 1266: reward = 111.00, steps = 111\n",
      "13:17:00 [DEBUG] train episode 1267: reward = 122.00, steps = 122\n",
      "13:17:06 [DEBUG] train episode 1268: reward = 109.00, steps = 109\n",
      "13:17:17 [DEBUG] train episode 1269: reward = 183.00, steps = 183\n",
      "13:17:23 [DEBUG] train episode 1270: reward = 111.00, steps = 111\n",
      "13:17:30 [DEBUG] train episode 1271: reward = 135.00, steps = 135\n",
      "13:17:36 [DEBUG] train episode 1272: reward = 102.00, steps = 102\n",
      "13:17:41 [DEBUG] train episode 1273: reward = 91.00, steps = 91\n",
      "13:17:47 [DEBUG] train episode 1274: reward = 108.00, steps = 108\n",
      "13:17:52 [DEBUG] train episode 1275: reward = 80.00, steps = 80\n",
      "13:17:59 [DEBUG] train episode 1276: reward = 127.00, steps = 127\n",
      "13:18:07 [DEBUG] train episode 1277: reward = 132.00, steps = 132\n",
      "13:18:16 [DEBUG] train episode 1278: reward = 156.00, steps = 156\n",
      "13:18:22 [DEBUG] train episode 1279: reward = 110.00, steps = 110\n",
      "13:18:28 [DEBUG] train episode 1280: reward = 107.00, steps = 107\n",
      "13:18:34 [DEBUG] train episode 1281: reward = 104.00, steps = 104\n",
      "13:18:39 [DEBUG] train episode 1282: reward = 94.00, steps = 94\n",
      "13:18:46 [DEBUG] train episode 1283: reward = 123.00, steps = 123\n",
      "13:18:53 [DEBUG] train episode 1284: reward = 131.00, steps = 131\n",
      "13:19:00 [DEBUG] train episode 1285: reward = 113.00, steps = 113\n",
      "13:19:05 [DEBUG] train episode 1286: reward = 106.00, steps = 106\n",
      "13:19:16 [DEBUG] train episode 1287: reward = 160.00, steps = 160\n",
      "13:19:24 [DEBUG] train episode 1288: reward = 104.00, steps = 104\n",
      "13:19:29 [DEBUG] train episode 1289: reward = 86.00, steps = 86\n",
      "13:19:36 [DEBUG] train episode 1290: reward = 96.00, steps = 96\n",
      "13:19:43 [DEBUG] train episode 1291: reward = 120.00, steps = 120\n",
      "13:19:49 [DEBUG] train episode 1292: reward = 111.00, steps = 111\n",
      "13:19:57 [DEBUG] train episode 1293: reward = 129.00, steps = 129\n",
      "13:20:05 [DEBUG] train episode 1294: reward = 131.00, steps = 131\n",
      "13:20:13 [DEBUG] train episode 1295: reward = 143.00, steps = 143\n",
      "13:20:21 [DEBUG] train episode 1296: reward = 126.00, steps = 126\n",
      "13:20:31 [DEBUG] train episode 1297: reward = 167.00, steps = 167\n",
      "13:20:39 [DEBUG] train episode 1298: reward = 135.00, steps = 135\n",
      "13:20:46 [DEBUG] train episode 1299: reward = 117.00, steps = 117\n",
      "13:20:53 [DEBUG] train episode 1300: reward = 126.00, steps = 126\n",
      "13:21:00 [DEBUG] train episode 1301: reward = 106.00, steps = 106\n",
      "13:21:06 [DEBUG] train episode 1302: reward = 107.00, steps = 107\n",
      "13:21:14 [DEBUG] train episode 1303: reward = 141.00, steps = 141\n",
      "13:21:21 [DEBUG] train episode 1304: reward = 123.00, steps = 123\n",
      "13:21:28 [DEBUG] train episode 1305: reward = 117.00, steps = 117\n",
      "13:21:36 [DEBUG] train episode 1306: reward = 136.00, steps = 136\n",
      "13:21:46 [DEBUG] train episode 1307: reward = 172.00, steps = 172\n",
      "13:21:56 [DEBUG] train episode 1308: reward = 172.00, steps = 172\n",
      "13:22:03 [DEBUG] train episode 1309: reward = 113.00, steps = 113\n",
      "13:22:11 [DEBUG] train episode 1310: reward = 142.00, steps = 142\n",
      "13:22:18 [DEBUG] train episode 1311: reward = 132.00, steps = 132\n",
      "13:22:23 [DEBUG] train episode 1312: reward = 92.00, steps = 92\n",
      "13:22:31 [DEBUG] train episode 1313: reward = 133.00, steps = 133\n",
      "13:22:41 [DEBUG] train episode 1314: reward = 148.00, steps = 148\n",
      "13:22:48 [DEBUG] train episode 1315: reward = 120.00, steps = 120\n",
      "13:22:58 [DEBUG] train episode 1316: reward = 142.00, steps = 142\n",
      "13:23:07 [DEBUG] train episode 1317: reward = 147.00, steps = 147\n",
      "13:23:14 [DEBUG] train episode 1318: reward = 133.00, steps = 133\n",
      "13:23:27 [DEBUG] train episode 1319: reward = 200.00, steps = 200\n",
      "13:23:39 [DEBUG] train episode 1320: reward = 187.00, steps = 187\n",
      "13:23:49 [DEBUG] train episode 1321: reward = 187.00, steps = 187\n",
      "13:23:58 [DEBUG] train episode 1322: reward = 163.00, steps = 163\n",
      "13:24:08 [DEBUG] train episode 1323: reward = 187.00, steps = 187\n",
      "13:24:18 [DEBUG] train episode 1324: reward = 182.00, steps = 182\n",
      "13:24:28 [DEBUG] train episode 1325: reward = 195.00, steps = 195\n",
      "13:24:37 [DEBUG] train episode 1326: reward = 151.00, steps = 151\n",
      "13:24:43 [DEBUG] train episode 1327: reward = 117.00, steps = 117\n",
      "13:24:53 [DEBUG] train episode 1328: reward = 181.00, steps = 181\n",
      "13:25:01 [DEBUG] train episode 1329: reward = 147.00, steps = 147\n",
      "13:25:08 [DEBUG] train episode 1330: reward = 122.00, steps = 122\n",
      "13:25:19 [DEBUG] train episode 1331: reward = 200.00, steps = 200\n",
      "13:25:30 [DEBUG] train episode 1332: reward = 200.00, steps = 200\n",
      "13:25:38 [DEBUG] train episode 1333: reward = 154.00, steps = 154\n",
      "13:25:46 [DEBUG] train episode 1334: reward = 144.00, steps = 144\n",
      "13:25:53 [DEBUG] train episode 1335: reward = 132.00, steps = 132\n",
      "13:26:04 [DEBUG] train episode 1336: reward = 200.00, steps = 200\n",
      "13:26:11 [DEBUG] train episode 1337: reward = 137.00, steps = 137\n",
      "13:26:21 [DEBUG] train episode 1338: reward = 166.00, steps = 166\n",
      "13:26:31 [DEBUG] train episode 1339: reward = 200.00, steps = 200\n",
      "13:26:38 [DEBUG] train episode 1340: reward = 125.00, steps = 125\n",
      "13:26:45 [DEBUG] train episode 1341: reward = 131.00, steps = 131\n",
      "13:26:53 [DEBUG] train episode 1342: reward = 153.00, steps = 153\n",
      "13:27:02 [DEBUG] train episode 1343: reward = 164.00, steps = 164\n",
      "13:27:08 [DEBUG] train episode 1344: reward = 107.00, steps = 107\n",
      "13:27:16 [DEBUG] train episode 1345: reward = 143.00, steps = 143\n",
      "13:27:25 [DEBUG] train episode 1346: reward = 160.00, steps = 160\n",
      "13:27:32 [DEBUG] train episode 1347: reward = 127.00, steps = 127\n",
      "13:27:41 [DEBUG] train episode 1348: reward = 165.00, steps = 165\n",
      "13:27:50 [DEBUG] train episode 1349: reward = 173.00, steps = 173\n",
      "13:28:00 [DEBUG] train episode 1350: reward = 173.00, steps = 173\n",
      "13:28:10 [DEBUG] train episode 1351: reward = 200.00, steps = 200\n",
      "13:28:17 [DEBUG] train episode 1352: reward = 120.00, steps = 120\n",
      "13:28:23 [DEBUG] train episode 1353: reward = 113.00, steps = 113\n",
      "13:28:30 [DEBUG] train episode 1354: reward = 132.00, steps = 132\n",
      "13:28:40 [DEBUG] train episode 1355: reward = 171.00, steps = 171\n",
      "13:28:46 [DEBUG] train episode 1356: reward = 112.00, steps = 112\n",
      "13:28:54 [DEBUG] train episode 1357: reward = 162.00, steps = 162\n",
      "13:29:03 [DEBUG] train episode 1358: reward = 162.00, steps = 162\n",
      "13:29:10 [DEBUG] train episode 1359: reward = 131.00, steps = 131\n",
      "13:29:19 [DEBUG] train episode 1360: reward = 153.00, steps = 153\n",
      "13:29:24 [DEBUG] train episode 1361: reward = 105.00, steps = 105\n",
      "13:29:32 [DEBUG] train episode 1362: reward = 145.00, steps = 145\n",
      "13:29:42 [DEBUG] train episode 1363: reward = 173.00, steps = 173\n",
      "13:29:50 [DEBUG] train episode 1364: reward = 147.00, steps = 147\n",
      "13:29:59 [DEBUG] train episode 1365: reward = 179.00, steps = 179\n",
      "13:30:10 [DEBUG] train episode 1366: reward = 200.00, steps = 200\n",
      "13:30:19 [DEBUG] train episode 1367: reward = 168.00, steps = 168\n",
      "13:30:27 [DEBUG] train episode 1368: reward = 144.00, steps = 144\n",
      "13:30:33 [DEBUG] train episode 1369: reward = 110.00, steps = 110\n",
      "13:30:39 [DEBUG] train episode 1370: reward = 102.00, steps = 102\n",
      "13:30:46 [DEBUG] train episode 1371: reward = 136.00, steps = 136\n",
      "13:30:57 [DEBUG] train episode 1372: reward = 200.00, steps = 200\n",
      "13:31:06 [DEBUG] train episode 1373: reward = 165.00, steps = 165\n",
      "13:31:16 [DEBUG] train episode 1374: reward = 187.00, steps = 187\n",
      "13:31:23 [DEBUG] train episode 1375: reward = 126.00, steps = 126\n",
      "13:31:30 [DEBUG] train episode 1376: reward = 124.00, steps = 124\n",
      "13:31:36 [DEBUG] train episode 1377: reward = 104.00, steps = 104\n",
      "13:31:44 [DEBUG] train episode 1378: reward = 163.00, steps = 163\n",
      "13:31:51 [DEBUG] train episode 1379: reward = 126.00, steps = 126\n",
      "13:31:58 [DEBUG] train episode 1380: reward = 120.00, steps = 120\n",
      "13:32:04 [DEBUG] train episode 1381: reward = 106.00, steps = 106\n",
      "13:32:10 [DEBUG] train episode 1382: reward = 115.00, steps = 115\n",
      "13:32:19 [DEBUG] train episode 1383: reward = 166.00, steps = 166\n",
      "13:32:26 [DEBUG] train episode 1384: reward = 123.00, steps = 123\n",
      "13:32:37 [DEBUG] train episode 1385: reward = 200.00, steps = 200\n",
      "13:32:45 [DEBUG] train episode 1386: reward = 154.00, steps = 154\n",
      "13:32:53 [DEBUG] train episode 1387: reward = 132.00, steps = 132\n",
      "13:33:03 [DEBUG] train episode 1388: reward = 200.00, steps = 200\n",
      "13:33:13 [DEBUG] train episode 1389: reward = 169.00, steps = 169\n",
      "13:33:23 [DEBUG] train episode 1390: reward = 200.00, steps = 200\n",
      "13:33:30 [DEBUG] train episode 1391: reward = 114.00, steps = 114\n",
      "13:33:37 [DEBUG] train episode 1392: reward = 132.00, steps = 132\n",
      "13:33:45 [DEBUG] train episode 1393: reward = 148.00, steps = 148\n",
      "13:33:52 [DEBUG] train episode 1394: reward = 132.00, steps = 132\n",
      "13:34:03 [DEBUG] train episode 1395: reward = 189.00, steps = 189\n",
      "13:34:13 [DEBUG] train episode 1396: reward = 152.00, steps = 152\n",
      "13:34:24 [DEBUG] train episode 1397: reward = 158.00, steps = 158\n",
      "13:34:29 [DEBUG] train episode 1398: reward = 90.00, steps = 90\n",
      "13:34:42 [DEBUG] train episode 1399: reward = 200.00, steps = 200\n",
      "13:34:52 [DEBUG] train episode 1400: reward = 149.00, steps = 149\n",
      "13:35:02 [DEBUG] train episode 1401: reward = 143.00, steps = 143\n",
      "13:35:12 [DEBUG] train episode 1402: reward = 163.00, steps = 163\n",
      "13:35:19 [DEBUG] train episode 1403: reward = 109.00, steps = 109\n",
      "13:35:29 [DEBUG] train episode 1404: reward = 149.00, steps = 149\n",
      "13:35:37 [DEBUG] train episode 1405: reward = 134.00, steps = 134\n",
      "13:35:43 [DEBUG] train episode 1406: reward = 98.00, steps = 98\n",
      "13:35:52 [DEBUG] train episode 1407: reward = 144.00, steps = 144\n",
      "13:36:01 [DEBUG] train episode 1408: reward = 146.00, steps = 146\n",
      "13:36:11 [DEBUG] train episode 1409: reward = 170.00, steps = 170\n",
      "13:36:21 [DEBUG] train episode 1410: reward = 157.00, steps = 157\n",
      "13:36:31 [DEBUG] train episode 1411: reward = 146.00, steps = 146\n",
      "13:36:44 [DEBUG] train episode 1412: reward = 189.00, steps = 189\n",
      "13:36:52 [DEBUG] train episode 1413: reward = 112.00, steps = 112\n",
      "13:37:01 [DEBUG] train episode 1414: reward = 162.00, steps = 162\n",
      "13:37:09 [DEBUG] train episode 1415: reward = 133.00, steps = 133\n",
      "13:37:17 [DEBUG] train episode 1416: reward = 153.00, steps = 153\n",
      "13:37:24 [DEBUG] train episode 1417: reward = 122.00, steps = 122\n",
      "13:37:31 [DEBUG] train episode 1418: reward = 119.00, steps = 119\n",
      "13:37:41 [DEBUG] train episode 1419: reward = 175.00, steps = 175\n",
      "13:37:50 [DEBUG] train episode 1420: reward = 181.00, steps = 181\n",
      "13:37:58 [DEBUG] train episode 1421: reward = 152.00, steps = 152\n",
      "13:38:04 [DEBUG] train episode 1422: reward = 134.00, steps = 134\n",
      "13:38:12 [DEBUG] train episode 1423: reward = 158.00, steps = 158\n",
      "13:38:20 [DEBUG] train episode 1424: reward = 149.00, steps = 149\n",
      "13:38:26 [DEBUG] train episode 1425: reward = 132.00, steps = 132\n",
      "13:38:34 [DEBUG] train episode 1426: reward = 156.00, steps = 156\n",
      "13:38:44 [DEBUG] train episode 1427: reward = 173.00, steps = 173\n",
      "13:38:51 [DEBUG] train episode 1428: reward = 122.00, steps = 122\n",
      "13:39:04 [DEBUG] train episode 1429: reward = 200.00, steps = 200\n",
      "13:39:14 [DEBUG] train episode 1430: reward = 135.00, steps = 135\n",
      "13:39:24 [DEBUG] train episode 1431: reward = 115.00, steps = 115\n",
      "13:39:33 [DEBUG] train episode 1432: reward = 119.00, steps = 119\n",
      "13:39:47 [DEBUG] train episode 1433: reward = 196.00, steps = 196\n",
      "13:39:56 [DEBUG] train episode 1434: reward = 157.00, steps = 157\n",
      "13:40:08 [DEBUG] train episode 1435: reward = 200.00, steps = 200\n",
      "13:40:16 [DEBUG] train episode 1436: reward = 152.00, steps = 152\n",
      "13:40:27 [DEBUG] train episode 1437: reward = 188.00, steps = 188\n",
      "13:40:37 [DEBUG] train episode 1438: reward = 167.00, steps = 167\n",
      "13:40:46 [DEBUG] train episode 1439: reward = 128.00, steps = 128\n",
      "13:40:54 [DEBUG] train episode 1440: reward = 156.00, steps = 156\n",
      "13:41:01 [DEBUG] train episode 1441: reward = 125.00, steps = 125\n",
      "13:41:08 [DEBUG] train episode 1442: reward = 128.00, steps = 128\n",
      "13:41:18 [DEBUG] train episode 1443: reward = 178.00, steps = 178\n",
      "13:41:28 [DEBUG] train episode 1444: reward = 181.00, steps = 181\n",
      "13:41:35 [DEBUG] train episode 1445: reward = 148.00, steps = 148\n",
      "13:41:41 [DEBUG] train episode 1446: reward = 104.00, steps = 104\n",
      "13:41:50 [DEBUG] train episode 1447: reward = 184.00, steps = 184\n",
      "13:41:55 [DEBUG] train episode 1448: reward = 117.00, steps = 117\n",
      "13:42:02 [DEBUG] train episode 1449: reward = 136.00, steps = 136\n",
      "13:42:09 [DEBUG] train episode 1450: reward = 140.00, steps = 140\n",
      "13:42:17 [DEBUG] train episode 1451: reward = 150.00, steps = 150\n",
      "13:42:24 [DEBUG] train episode 1452: reward = 134.00, steps = 134\n",
      "13:42:31 [DEBUG] train episode 1453: reward = 155.00, steps = 155\n",
      "13:42:40 [DEBUG] train episode 1454: reward = 170.00, steps = 170\n",
      "13:42:48 [DEBUG] train episode 1455: reward = 168.00, steps = 168\n",
      "13:42:57 [DEBUG] train episode 1456: reward = 188.00, steps = 188\n",
      "13:43:02 [DEBUG] train episode 1457: reward = 105.00, steps = 105\n",
      "13:43:11 [DEBUG] train episode 1458: reward = 169.00, steps = 169\n",
      "13:43:17 [DEBUG] train episode 1459: reward = 137.00, steps = 137\n",
      "13:43:24 [DEBUG] train episode 1460: reward = 132.00, steps = 132\n",
      "13:43:33 [DEBUG] train episode 1461: reward = 185.00, steps = 185\n",
      "13:43:41 [DEBUG] train episode 1462: reward = 160.00, steps = 160\n",
      "13:43:46 [DEBUG] train episode 1463: reward = 121.00, steps = 121\n",
      "13:43:56 [DEBUG] train episode 1464: reward = 200.00, steps = 200\n",
      "13:44:01 [DEBUG] train episode 1465: reward = 98.00, steps = 98\n",
      "13:44:09 [DEBUG] train episode 1466: reward = 159.00, steps = 159\n",
      "13:44:15 [DEBUG] train episode 1467: reward = 126.00, steps = 126\n",
      "13:44:21 [DEBUG] train episode 1468: reward = 128.00, steps = 128\n",
      "13:44:31 [DEBUG] train episode 1469: reward = 200.00, steps = 200\n",
      "13:44:38 [DEBUG] train episode 1470: reward = 140.00, steps = 140\n",
      "13:44:48 [DEBUG] train episode 1471: reward = 200.00, steps = 200\n",
      "13:44:54 [DEBUG] train episode 1472: reward = 124.00, steps = 124\n",
      "13:45:02 [DEBUG] train episode 1473: reward = 164.00, steps = 164\n",
      "13:45:08 [DEBUG] train episode 1474: reward = 121.00, steps = 121\n",
      "13:45:14 [DEBUG] train episode 1475: reward = 115.00, steps = 115\n",
      "13:45:21 [DEBUG] train episode 1476: reward = 152.00, steps = 152\n",
      "13:45:27 [DEBUG] train episode 1477: reward = 112.00, steps = 112\n",
      "13:45:35 [DEBUG] train episode 1478: reward = 173.00, steps = 173\n",
      "13:45:43 [DEBUG] train episode 1479: reward = 146.00, steps = 146\n",
      "13:45:50 [DEBUG] train episode 1480: reward = 141.00, steps = 141\n",
      "13:45:56 [DEBUG] train episode 1481: reward = 122.00, steps = 122\n",
      "13:46:05 [DEBUG] train episode 1482: reward = 200.00, steps = 200\n",
      "13:46:12 [DEBUG] train episode 1483: reward = 128.00, steps = 128\n",
      "13:46:22 [DEBUG] train episode 1484: reward = 198.00, steps = 198\n",
      "13:46:31 [DEBUG] train episode 1485: reward = 200.00, steps = 200\n",
      "13:46:39 [DEBUG] train episode 1486: reward = 165.00, steps = 165\n",
      "13:46:46 [DEBUG] train episode 1487: reward = 138.00, steps = 138\n",
      "13:46:56 [DEBUG] train episode 1488: reward = 200.00, steps = 200\n",
      "13:47:06 [DEBUG] train episode 1489: reward = 195.00, steps = 195\n",
      "13:47:15 [DEBUG] train episode 1490: reward = 193.00, steps = 193\n",
      "13:47:24 [DEBUG] train episode 1491: reward = 177.00, steps = 177\n",
      "13:47:34 [DEBUG] train episode 1492: reward = 187.00, steps = 187\n",
      "13:47:42 [DEBUG] train episode 1493: reward = 161.00, steps = 161\n",
      "13:47:48 [DEBUG] train episode 1494: reward = 111.00, steps = 111\n",
      "13:47:56 [DEBUG] train episode 1495: reward = 148.00, steps = 148\n",
      "13:48:06 [DEBUG] train episode 1496: reward = 200.00, steps = 200\n",
      "13:48:14 [DEBUG] train episode 1497: reward = 157.00, steps = 157\n",
      "13:48:22 [DEBUG] train episode 1498: reward = 152.00, steps = 152\n",
      "13:48:30 [DEBUG] train episode 1499: reward = 140.00, steps = 140\n",
      "13:48:37 [DEBUG] train episode 1500: reward = 136.00, steps = 136\n",
      "13:48:45 [DEBUG] train episode 1501: reward = 149.00, steps = 149\n",
      "13:48:55 [DEBUG] train episode 1502: reward = 200.00, steps = 200\n",
      "13:49:06 [DEBUG] train episode 1503: reward = 200.00, steps = 200\n",
      "13:49:15 [DEBUG] train episode 1504: reward = 174.00, steps = 174\n",
      "13:49:23 [DEBUG] train episode 1505: reward = 161.00, steps = 161\n",
      "13:49:30 [DEBUG] train episode 1506: reward = 137.00, steps = 137\n",
      "13:49:38 [DEBUG] train episode 1507: reward = 139.00, steps = 139\n",
      "13:49:44 [DEBUG] train episode 1508: reward = 122.00, steps = 122\n",
      "13:49:55 [DEBUG] train episode 1509: reward = 200.00, steps = 200\n",
      "13:50:05 [DEBUG] train episode 1510: reward = 200.00, steps = 200\n",
      "13:50:12 [DEBUG] train episode 1511: reward = 127.00, steps = 127\n",
      "13:50:18 [DEBUG] train episode 1512: reward = 125.00, steps = 125\n",
      "13:50:26 [DEBUG] train episode 1513: reward = 152.00, steps = 152\n",
      "13:50:33 [DEBUG] train episode 1514: reward = 118.00, steps = 118\n",
      "13:50:43 [DEBUG] train episode 1515: reward = 197.00, steps = 197\n",
      "13:50:52 [DEBUG] train episode 1516: reward = 169.00, steps = 169\n",
      "13:51:01 [DEBUG] train episode 1517: reward = 183.00, steps = 183\n",
      "13:51:11 [DEBUG] train episode 1518: reward = 189.00, steps = 189\n",
      "13:51:17 [DEBUG] train episode 1519: reward = 110.00, steps = 110\n",
      "13:51:25 [DEBUG] train episode 1520: reward = 152.00, steps = 152\n",
      "13:51:34 [DEBUG] train episode 1521: reward = 176.00, steps = 176\n",
      "13:51:43 [DEBUG] train episode 1522: reward = 162.00, steps = 162\n",
      "13:51:50 [DEBUG] train episode 1523: reward = 143.00, steps = 143\n",
      "13:51:56 [DEBUG] train episode 1524: reward = 113.00, steps = 113\n",
      "13:52:05 [DEBUG] train episode 1525: reward = 179.00, steps = 179\n",
      "13:52:14 [DEBUG] train episode 1526: reward = 163.00, steps = 163\n",
      "13:52:23 [DEBUG] train episode 1527: reward = 170.00, steps = 170\n",
      "13:52:34 [DEBUG] train episode 1528: reward = 200.00, steps = 200\n",
      "13:52:43 [DEBUG] train episode 1529: reward = 173.00, steps = 173\n",
      "13:52:51 [DEBUG] train episode 1530: reward = 160.00, steps = 160\n",
      "13:52:58 [DEBUG] train episode 1531: reward = 140.00, steps = 140\n",
      "13:53:05 [DEBUG] train episode 1532: reward = 126.00, steps = 126\n",
      "13:53:12 [DEBUG] train episode 1533: reward = 144.00, steps = 144\n",
      "13:53:19 [DEBUG] train episode 1534: reward = 132.00, steps = 132\n",
      "13:53:29 [DEBUG] train episode 1535: reward = 178.00, steps = 178\n",
      "13:53:39 [DEBUG] train episode 1536: reward = 189.00, steps = 189\n",
      "13:53:49 [DEBUG] train episode 1537: reward = 200.00, steps = 200\n",
      "13:53:59 [DEBUG] train episode 1538: reward = 200.00, steps = 200\n",
      "13:54:06 [DEBUG] train episode 1539: reward = 123.00, steps = 123\n",
      "13:54:12 [DEBUG] train episode 1540: reward = 113.00, steps = 113\n",
      "13:54:19 [DEBUG] train episode 1541: reward = 141.00, steps = 141\n",
      "13:54:27 [DEBUG] train episode 1542: reward = 146.00, steps = 146\n",
      "13:54:38 [DEBUG] train episode 1543: reward = 200.00, steps = 200\n",
      "13:54:45 [DEBUG] train episode 1544: reward = 137.00, steps = 137\n",
      "13:54:54 [DEBUG] train episode 1545: reward = 186.00, steps = 186\n",
      "13:55:03 [DEBUG] train episode 1546: reward = 159.00, steps = 159\n",
      "13:55:13 [DEBUG] train episode 1547: reward = 199.00, steps = 199\n",
      "13:55:21 [DEBUG] train episode 1548: reward = 144.00, steps = 144\n",
      "13:55:28 [DEBUG] train episode 1549: reward = 141.00, steps = 141\n",
      "13:55:39 [DEBUG] train episode 1550: reward = 200.00, steps = 200\n",
      "13:55:49 [DEBUG] train episode 1551: reward = 200.00, steps = 200\n",
      "13:56:00 [DEBUG] train episode 1552: reward = 200.00, steps = 200\n",
      "13:56:10 [DEBUG] train episode 1553: reward = 200.00, steps = 200\n",
      "13:56:19 [DEBUG] train episode 1554: reward = 174.00, steps = 174\n",
      "13:56:26 [DEBUG] train episode 1555: reward = 129.00, steps = 129\n",
      "13:56:33 [DEBUG] train episode 1556: reward = 140.00, steps = 140\n",
      "13:56:44 [DEBUG] train episode 1557: reward = 200.00, steps = 200\n",
      "13:56:50 [DEBUG] train episode 1558: reward = 122.00, steps = 122\n",
      "13:57:01 [DEBUG] train episode 1559: reward = 200.00, steps = 200\n",
      "13:57:11 [DEBUG] train episode 1560: reward = 200.00, steps = 200\n",
      "13:57:18 [DEBUG] train episode 1561: reward = 123.00, steps = 123\n",
      "13:57:27 [DEBUG] train episode 1562: reward = 178.00, steps = 178\n",
      "13:57:33 [DEBUG] train episode 1563: reward = 106.00, steps = 106\n",
      "13:57:40 [DEBUG] train episode 1564: reward = 141.00, steps = 141\n",
      "13:57:50 [DEBUG] train episode 1565: reward = 200.00, steps = 200\n",
      "13:57:58 [DEBUG] train episode 1566: reward = 145.00, steps = 145\n",
      "13:58:05 [DEBUG] train episode 1567: reward = 139.00, steps = 139\n",
      "13:58:13 [DEBUG] train episode 1568: reward = 148.00, steps = 148\n",
      "13:58:22 [DEBUG] train episode 1569: reward = 169.00, steps = 169\n",
      "13:58:29 [DEBUG] train episode 1570: reward = 140.00, steps = 140\n",
      "13:58:39 [DEBUG] train episode 1571: reward = 176.00, steps = 176\n",
      "13:58:46 [DEBUG] train episode 1572: reward = 139.00, steps = 139\n",
      "13:58:53 [DEBUG] train episode 1573: reward = 129.00, steps = 129\n",
      "13:59:01 [DEBUG] train episode 1574: reward = 146.00, steps = 146\n",
      "13:59:08 [DEBUG] train episode 1575: reward = 132.00, steps = 132\n",
      "13:59:14 [DEBUG] train episode 1576: reward = 121.00, steps = 121\n",
      "13:59:21 [DEBUG] train episode 1577: reward = 136.00, steps = 136\n",
      "13:59:32 [DEBUG] train episode 1578: reward = 200.00, steps = 200\n",
      "13:59:37 [DEBUG] train episode 1579: reward = 107.00, steps = 107\n",
      "13:59:44 [DEBUG] train episode 1580: reward = 132.00, steps = 132\n",
      "13:59:54 [DEBUG] train episode 1581: reward = 181.00, steps = 181\n",
      "13:59:59 [DEBUG] train episode 1582: reward = 109.00, steps = 109\n",
      "14:00:09 [DEBUG] train episode 1583: reward = 184.00, steps = 184\n",
      "14:00:19 [DEBUG] train episode 1584: reward = 193.00, steps = 193\n",
      "14:00:26 [DEBUG] train episode 1585: reward = 132.00, steps = 132\n",
      "14:00:34 [DEBUG] train episode 1586: reward = 157.00, steps = 157\n",
      "14:00:43 [DEBUG] train episode 1587: reward = 158.00, steps = 158\n",
      "14:00:53 [DEBUG] train episode 1588: reward = 184.00, steps = 184\n",
      "14:01:03 [DEBUG] train episode 1589: reward = 200.00, steps = 200\n",
      "14:01:10 [DEBUG] train episode 1590: reward = 129.00, steps = 129\n",
      "14:01:17 [DEBUG] train episode 1591: reward = 144.00, steps = 144\n",
      "14:01:25 [DEBUG] train episode 1592: reward = 148.00, steps = 148\n",
      "14:01:35 [DEBUG] train episode 1593: reward = 188.00, steps = 188\n",
      "14:01:42 [DEBUG] train episode 1594: reward = 123.00, steps = 123\n",
      "14:01:50 [DEBUG] train episode 1595: reward = 160.00, steps = 160\n",
      "14:01:56 [DEBUG] train episode 1596: reward = 111.00, steps = 111\n",
      "14:02:04 [DEBUG] train episode 1597: reward = 145.00, steps = 145\n",
      "14:02:14 [DEBUG] train episode 1598: reward = 195.00, steps = 195\n",
      "14:02:24 [DEBUG] train episode 1599: reward = 200.00, steps = 200\n",
      "14:02:33 [DEBUG] train episode 1600: reward = 164.00, steps = 164\n",
      "14:02:39 [DEBUG] train episode 1601: reward = 122.00, steps = 122\n",
      "14:02:47 [DEBUG] train episode 1602: reward = 151.00, steps = 151\n",
      "14:02:58 [DEBUG] train episode 1603: reward = 200.00, steps = 200\n",
      "14:03:04 [DEBUG] train episode 1604: reward = 118.00, steps = 118\n",
      "14:03:12 [DEBUG] train episode 1605: reward = 141.00, steps = 141\n",
      "14:03:20 [DEBUG] train episode 1606: reward = 162.00, steps = 162\n",
      "14:03:28 [DEBUG] train episode 1607: reward = 145.00, steps = 145\n",
      "14:03:37 [DEBUG] train episode 1608: reward = 178.00, steps = 178\n",
      "14:03:45 [DEBUG] train episode 1609: reward = 145.00, steps = 145\n",
      "14:03:55 [DEBUG] train episode 1610: reward = 200.00, steps = 200\n",
      "14:04:02 [DEBUG] train episode 1611: reward = 138.00, steps = 138\n",
      "14:04:10 [DEBUG] train episode 1612: reward = 144.00, steps = 144\n",
      "14:04:20 [DEBUG] train episode 1613: reward = 200.00, steps = 200\n",
      "14:04:28 [DEBUG] train episode 1614: reward = 138.00, steps = 138\n",
      "14:04:38 [DEBUG] train episode 1615: reward = 187.00, steps = 187\n",
      "14:04:44 [DEBUG] train episode 1616: reward = 115.00, steps = 115\n",
      "14:04:50 [DEBUG] train episode 1617: reward = 119.00, steps = 119\n",
      "14:04:58 [DEBUG] train episode 1618: reward = 150.00, steps = 150\n",
      "14:05:09 [DEBUG] train episode 1619: reward = 200.00, steps = 200\n",
      "14:05:19 [DEBUG] train episode 1620: reward = 200.00, steps = 200\n",
      "14:05:30 [DEBUG] train episode 1621: reward = 200.00, steps = 200\n",
      "14:05:38 [DEBUG] train episode 1622: reward = 155.00, steps = 155\n",
      "14:05:45 [DEBUG] train episode 1623: reward = 142.00, steps = 142\n",
      "14:05:53 [DEBUG] train episode 1624: reward = 140.00, steps = 140\n",
      "14:05:59 [DEBUG] train episode 1625: reward = 132.00, steps = 132\n",
      "14:06:06 [DEBUG] train episode 1626: reward = 120.00, steps = 120\n",
      "14:06:13 [DEBUG] train episode 1627: reward = 132.00, steps = 132\n",
      "14:06:23 [DEBUG] train episode 1628: reward = 200.00, steps = 200\n",
      "14:06:29 [DEBUG] train episode 1629: reward = 113.00, steps = 113\n",
      "14:06:40 [DEBUG] train episode 1630: reward = 195.00, steps = 195\n",
      "14:06:45 [DEBUG] train episode 1631: reward = 106.00, steps = 106\n",
      "14:06:51 [DEBUG] train episode 1632: reward = 105.00, steps = 105\n",
      "14:07:01 [DEBUG] train episode 1633: reward = 189.00, steps = 189\n",
      "14:07:09 [DEBUG] train episode 1634: reward = 166.00, steps = 166\n",
      "14:07:19 [DEBUG] train episode 1635: reward = 179.00, steps = 179\n",
      "14:07:27 [DEBUG] train episode 1636: reward = 154.00, steps = 154\n",
      "14:07:35 [DEBUG] train episode 1637: reward = 151.00, steps = 151\n",
      "14:07:45 [DEBUG] train episode 1638: reward = 200.00, steps = 200\n",
      "14:07:56 [DEBUG] train episode 1639: reward = 200.00, steps = 200\n",
      "14:08:02 [DEBUG] train episode 1640: reward = 118.00, steps = 118\n",
      "14:08:13 [DEBUG] train episode 1641: reward = 200.00, steps = 200\n",
      "14:08:23 [DEBUG] train episode 1642: reward = 191.00, steps = 191\n",
      "14:08:30 [DEBUG] train episode 1643: reward = 138.00, steps = 138\n",
      "14:08:37 [DEBUG] train episode 1644: reward = 133.00, steps = 133\n",
      "14:08:44 [DEBUG] train episode 1645: reward = 127.00, steps = 127\n",
      "14:08:53 [DEBUG] train episode 1646: reward = 178.00, steps = 178\n",
      "14:08:58 [DEBUG] train episode 1647: reward = 97.00, steps = 97\n",
      "14:09:08 [DEBUG] train episode 1648: reward = 186.00, steps = 186\n",
      "14:09:16 [DEBUG] train episode 1649: reward = 155.00, steps = 155\n",
      "14:09:22 [DEBUG] train episode 1650: reward = 117.00, steps = 117\n",
      "14:09:32 [DEBUG] train episode 1651: reward = 179.00, steps = 179\n",
      "14:09:40 [DEBUG] train episode 1652: reward = 154.00, steps = 154\n",
      "14:09:49 [DEBUG] train episode 1653: reward = 140.00, steps = 140\n",
      "14:09:58 [DEBUG] train episode 1654: reward = 175.00, steps = 175\n",
      "14:10:09 [DEBUG] train episode 1655: reward = 199.00, steps = 199\n",
      "14:10:18 [DEBUG] train episode 1656: reward = 186.00, steps = 186\n",
      "14:10:29 [DEBUG] train episode 1657: reward = 200.00, steps = 200\n",
      "14:10:36 [DEBUG] train episode 1658: reward = 136.00, steps = 136\n",
      "14:10:44 [DEBUG] train episode 1659: reward = 144.00, steps = 144\n",
      "14:10:52 [DEBUG] train episode 1660: reward = 157.00, steps = 157\n",
      "14:10:59 [DEBUG] train episode 1661: reward = 136.00, steps = 136\n",
      "14:11:07 [DEBUG] train episode 1662: reward = 132.00, steps = 132\n",
      "14:11:14 [DEBUG] train episode 1663: reward = 138.00, steps = 138\n",
      "14:11:23 [DEBUG] train episode 1664: reward = 165.00, steps = 165\n",
      "14:11:33 [DEBUG] train episode 1665: reward = 195.00, steps = 195\n",
      "14:11:43 [DEBUG] train episode 1666: reward = 200.00, steps = 200\n",
      "14:11:50 [DEBUG] train episode 1667: reward = 132.00, steps = 132\n",
      "14:12:01 [DEBUG] train episode 1668: reward = 200.00, steps = 200\n",
      "14:12:11 [DEBUG] train episode 1669: reward = 187.00, steps = 187\n",
      "14:12:18 [DEBUG] train episode 1670: reward = 139.00, steps = 139\n",
      "14:12:25 [DEBUG] train episode 1671: reward = 140.00, steps = 140\n",
      "14:12:33 [DEBUG] train episode 1672: reward = 156.00, steps = 156\n",
      "14:12:41 [DEBUG] train episode 1673: reward = 134.00, steps = 134\n",
      "14:12:51 [DEBUG] train episode 1674: reward = 200.00, steps = 200\n",
      "14:12:57 [DEBUG] train episode 1675: reward = 118.00, steps = 118\n",
      "14:13:04 [DEBUG] train episode 1676: reward = 120.00, steps = 120\n",
      "14:13:12 [DEBUG] train episode 1677: reward = 168.00, steps = 168\n",
      "14:13:22 [DEBUG] train episode 1678: reward = 163.00, steps = 163\n",
      "14:13:30 [DEBUG] train episode 1679: reward = 137.00, steps = 137\n",
      "14:13:38 [DEBUG] train episode 1680: reward = 158.00, steps = 158\n",
      "14:13:47 [DEBUG] train episode 1681: reward = 161.00, steps = 161\n",
      "14:13:55 [DEBUG] train episode 1682: reward = 134.00, steps = 134\n",
      "14:14:06 [DEBUG] train episode 1683: reward = 200.00, steps = 200\n",
      "14:14:14 [DEBUG] train episode 1684: reward = 153.00, steps = 153\n",
      "14:14:23 [DEBUG] train episode 1685: reward = 166.00, steps = 166\n",
      "14:14:36 [DEBUG] train episode 1686: reward = 200.00, steps = 200\n",
      "14:14:46 [DEBUG] train episode 1687: reward = 171.00, steps = 171\n",
      "14:14:52 [DEBUG] train episode 1688: reward = 110.00, steps = 110\n",
      "14:14:59 [DEBUG] train episode 1689: reward = 126.00, steps = 126\n",
      "14:15:07 [DEBUG] train episode 1690: reward = 148.00, steps = 148\n",
      "14:15:17 [DEBUG] train episode 1691: reward = 170.00, steps = 170\n",
      "14:15:24 [DEBUG] train episode 1692: reward = 121.00, steps = 121\n",
      "14:15:36 [DEBUG] train episode 1693: reward = 200.00, steps = 200\n",
      "14:15:48 [DEBUG] train episode 1694: reward = 178.00, steps = 178\n",
      "14:15:54 [DEBUG] train episode 1695: reward = 116.00, steps = 116\n",
      "14:16:01 [DEBUG] train episode 1696: reward = 126.00, steps = 126\n",
      "14:16:08 [DEBUG] train episode 1697: reward = 122.00, steps = 122\n",
      "14:16:15 [DEBUG] train episode 1698: reward = 124.00, steps = 124\n",
      "14:16:26 [DEBUG] train episode 1699: reward = 200.00, steps = 200\n",
      "14:16:37 [DEBUG] train episode 1700: reward = 193.00, steps = 193\n",
      "14:16:50 [DEBUG] train episode 1701: reward = 200.00, steps = 200\n",
      "14:17:02 [DEBUG] train episode 1702: reward = 189.00, steps = 189\n",
      "14:17:12 [DEBUG] train episode 1703: reward = 200.00, steps = 200\n",
      "14:17:22 [DEBUG] train episode 1704: reward = 177.00, steps = 177\n",
      "14:17:32 [DEBUG] train episode 1705: reward = 200.00, steps = 200\n",
      "14:17:41 [DEBUG] train episode 1706: reward = 161.00, steps = 161\n",
      "14:17:47 [DEBUG] train episode 1707: reward = 128.00, steps = 128\n",
      "14:17:55 [DEBUG] train episode 1708: reward = 133.00, steps = 133\n",
      "14:18:05 [DEBUG] train episode 1709: reward = 200.00, steps = 200\n",
      "14:18:13 [DEBUG] train episode 1710: reward = 150.00, steps = 150\n",
      "14:18:21 [DEBUG] train episode 1711: reward = 142.00, steps = 142\n",
      "14:18:27 [DEBUG] train episode 1712: reward = 125.00, steps = 125\n",
      "14:18:34 [DEBUG] train episode 1713: reward = 131.00, steps = 131\n",
      "14:18:43 [DEBUG] train episode 1714: reward = 160.00, steps = 160\n",
      "14:18:49 [DEBUG] train episode 1715: reward = 127.00, steps = 127\n",
      "14:18:55 [DEBUG] train episode 1716: reward = 113.00, steps = 113\n",
      "14:19:03 [DEBUG] train episode 1717: reward = 137.00, steps = 137\n",
      "14:19:10 [DEBUG] train episode 1718: reward = 136.00, steps = 136\n",
      "14:19:20 [DEBUG] train episode 1719: reward = 200.00, steps = 200\n",
      "14:19:27 [DEBUG] train episode 1720: reward = 128.00, steps = 128\n",
      "14:19:35 [DEBUG] train episode 1721: reward = 140.00, steps = 140\n",
      "14:19:42 [DEBUG] train episode 1722: reward = 135.00, steps = 135\n",
      "14:19:50 [DEBUG] train episode 1723: reward = 150.00, steps = 150\n",
      "14:20:00 [DEBUG] train episode 1724: reward = 200.00, steps = 200\n",
      "14:20:11 [DEBUG] train episode 1725: reward = 200.00, steps = 200\n",
      "14:20:21 [DEBUG] train episode 1726: reward = 200.00, steps = 200\n",
      "14:20:32 [DEBUG] train episode 1727: reward = 200.00, steps = 200\n",
      "14:20:39 [DEBUG] train episode 1728: reward = 127.00, steps = 127\n",
      "14:20:47 [DEBUG] train episode 1729: reward = 147.00, steps = 147\n",
      "14:20:54 [DEBUG] train episode 1730: reward = 134.00, steps = 134\n",
      "14:21:03 [DEBUG] train episode 1731: reward = 178.00, steps = 178\n",
      "14:21:10 [DEBUG] train episode 1732: reward = 133.00, steps = 133\n",
      "14:21:18 [DEBUG] train episode 1733: reward = 136.00, steps = 136\n",
      "14:21:27 [DEBUG] train episode 1734: reward = 173.00, steps = 173\n",
      "14:21:35 [DEBUG] train episode 1735: reward = 153.00, steps = 153\n",
      "14:21:42 [DEBUG] train episode 1736: reward = 134.00, steps = 134\n",
      "14:21:52 [DEBUG] train episode 1737: reward = 193.00, steps = 193\n",
      "14:21:59 [DEBUG] train episode 1738: reward = 136.00, steps = 136\n",
      "14:22:08 [DEBUG] train episode 1739: reward = 164.00, steps = 164\n",
      "14:22:14 [DEBUG] train episode 1740: reward = 104.00, steps = 104\n",
      "14:22:21 [DEBUG] train episode 1741: reward = 129.00, steps = 129\n",
      "14:22:28 [DEBUG] train episode 1742: reward = 139.00, steps = 139\n",
      "14:22:37 [DEBUG] train episode 1743: reward = 183.00, steps = 183\n",
      "14:22:44 [DEBUG] train episode 1744: reward = 124.00, steps = 124\n",
      "14:22:52 [DEBUG] train episode 1745: reward = 144.00, steps = 144\n",
      "14:22:58 [DEBUG] train episode 1746: reward = 111.00, steps = 111\n",
      "14:23:04 [DEBUG] train episode 1747: reward = 123.00, steps = 123\n",
      "14:23:15 [DEBUG] train episode 1748: reward = 195.00, steps = 195\n",
      "14:23:24 [DEBUG] train episode 1749: reward = 167.00, steps = 167\n",
      "14:23:30 [DEBUG] train episode 1750: reward = 113.00, steps = 113\n",
      "14:23:39 [DEBUG] train episode 1751: reward = 155.00, steps = 155\n",
      "14:23:45 [DEBUG] train episode 1752: reward = 110.00, steps = 110\n",
      "14:23:55 [DEBUG] train episode 1753: reward = 189.00, steps = 189\n",
      "14:24:04 [DEBUG] train episode 1754: reward = 156.00, steps = 156\n",
      "14:24:12 [DEBUG] train episode 1755: reward = 135.00, steps = 135\n",
      "14:24:22 [DEBUG] train episode 1756: reward = 188.00, steps = 188\n",
      "14:24:31 [DEBUG] train episode 1757: reward = 162.00, steps = 162\n",
      "14:24:39 [DEBUG] train episode 1758: reward = 157.00, steps = 157\n",
      "14:24:45 [DEBUG] train episode 1759: reward = 112.00, steps = 112\n",
      "14:24:53 [DEBUG] train episode 1760: reward = 158.00, steps = 158\n",
      "14:25:00 [DEBUG] train episode 1761: reward = 131.00, steps = 131\n",
      "14:25:07 [DEBUG] train episode 1762: reward = 136.00, steps = 136\n",
      "14:25:17 [DEBUG] train episode 1763: reward = 182.00, steps = 182\n",
      "14:25:25 [DEBUG] train episode 1764: reward = 158.00, steps = 158\n",
      "14:25:36 [DEBUG] train episode 1765: reward = 200.00, steps = 200\n",
      "14:25:43 [DEBUG] train episode 1766: reward = 134.00, steps = 134\n",
      "14:25:54 [DEBUG] train episode 1767: reward = 200.00, steps = 200\n",
      "14:26:00 [DEBUG] train episode 1768: reward = 120.00, steps = 120\n",
      "14:26:08 [DEBUG] train episode 1769: reward = 155.00, steps = 155\n",
      "14:26:19 [DEBUG] train episode 1770: reward = 200.00, steps = 200\n",
      "14:26:26 [DEBUG] train episode 1771: reward = 144.00, steps = 144\n",
      "14:26:34 [DEBUG] train episode 1772: reward = 127.00, steps = 127\n",
      "14:26:48 [DEBUG] train episode 1773: reward = 200.00, steps = 200\n",
      "14:27:00 [DEBUG] train episode 1774: reward = 200.00, steps = 200\n",
      "14:27:11 [DEBUG] train episode 1775: reward = 200.00, steps = 200\n",
      "14:27:18 [DEBUG] train episode 1776: reward = 116.00, steps = 116\n",
      "14:27:29 [DEBUG] train episode 1777: reward = 200.00, steps = 200\n",
      "14:27:40 [DEBUG] train episode 1778: reward = 187.00, steps = 187\n",
      "14:27:47 [DEBUG] train episode 1779: reward = 118.00, steps = 118\n",
      "14:27:59 [DEBUG] train episode 1780: reward = 200.00, steps = 200\n",
      "14:28:07 [DEBUG] train episode 1781: reward = 130.00, steps = 130\n",
      "14:28:17 [DEBUG] train episode 1782: reward = 161.00, steps = 161\n",
      "14:28:26 [DEBUG] train episode 1783: reward = 156.00, steps = 156\n",
      "14:28:38 [DEBUG] train episode 1784: reward = 200.00, steps = 200\n",
      "14:28:47 [DEBUG] train episode 1785: reward = 134.00, steps = 134\n",
      "14:28:58 [DEBUG] train episode 1786: reward = 200.00, steps = 200\n",
      "14:29:09 [DEBUG] train episode 1787: reward = 167.00, steps = 167\n",
      "14:29:22 [DEBUG] train episode 1788: reward = 200.00, steps = 200\n",
      "14:29:30 [DEBUG] train episode 1789: reward = 149.00, steps = 149\n",
      "14:29:39 [DEBUG] train episode 1790: reward = 141.00, steps = 141\n",
      "14:29:50 [DEBUG] train episode 1791: reward = 164.00, steps = 164\n",
      "14:30:03 [DEBUG] train episode 1792: reward = 200.00, steps = 200\n",
      "14:30:11 [DEBUG] train episode 1793: reward = 140.00, steps = 140\n",
      "14:30:23 [DEBUG] train episode 1794: reward = 200.00, steps = 200\n",
      "14:30:30 [DEBUG] train episode 1795: reward = 125.00, steps = 125\n",
      "14:30:40 [DEBUG] train episode 1796: reward = 194.00, steps = 194\n",
      "14:30:51 [DEBUG] train episode 1797: reward = 200.00, steps = 200\n",
      "14:30:59 [DEBUG] train episode 1798: reward = 149.00, steps = 149\n",
      "14:31:06 [DEBUG] train episode 1799: reward = 134.00, steps = 134\n",
      "14:31:13 [DEBUG] train episode 1800: reward = 131.00, steps = 131\n",
      "14:31:24 [DEBUG] train episode 1801: reward = 198.00, steps = 198\n",
      "14:31:34 [DEBUG] train episode 1802: reward = 148.00, steps = 148\n",
      "14:31:45 [DEBUG] train episode 1803: reward = 199.00, steps = 199\n",
      "14:31:54 [DEBUG] train episode 1804: reward = 142.00, steps = 142\n",
      "14:32:03 [DEBUG] train episode 1805: reward = 170.00, steps = 170\n",
      "14:32:10 [DEBUG] train episode 1806: reward = 127.00, steps = 127\n",
      "14:32:23 [DEBUG] train episode 1807: reward = 200.00, steps = 200\n",
      "14:32:33 [DEBUG] train episode 1808: reward = 160.00, steps = 160\n",
      "14:32:43 [DEBUG] train episode 1809: reward = 170.00, steps = 170\n",
      "14:32:54 [DEBUG] train episode 1810: reward = 200.00, steps = 200\n",
      "14:33:06 [DEBUG] train episode 1811: reward = 200.00, steps = 200\n",
      "14:33:17 [DEBUG] train episode 1812: reward = 182.00, steps = 182\n",
      "14:33:27 [DEBUG] train episode 1813: reward = 176.00, steps = 176\n",
      "14:33:37 [DEBUG] train episode 1814: reward = 179.00, steps = 179\n",
      "14:33:45 [DEBUG] train episode 1815: reward = 144.00, steps = 144\n",
      "14:33:57 [DEBUG] train episode 1816: reward = 200.00, steps = 200\n",
      "14:34:05 [DEBUG] train episode 1817: reward = 142.00, steps = 142\n",
      "14:34:14 [DEBUG] train episode 1818: reward = 161.00, steps = 161\n",
      "14:34:24 [DEBUG] train episode 1819: reward = 176.00, steps = 176\n",
      "14:34:35 [DEBUG] train episode 1820: reward = 192.00, steps = 192\n",
      "14:34:46 [DEBUG] train episode 1821: reward = 193.00, steps = 193\n",
      "14:34:55 [DEBUG] train episode 1822: reward = 159.00, steps = 159\n",
      "14:35:07 [DEBUG] train episode 1823: reward = 200.00, steps = 200\n",
      "14:35:16 [DEBUG] train episode 1824: reward = 158.00, steps = 158\n",
      "14:35:23 [DEBUG] train episode 1825: reward = 128.00, steps = 128\n",
      "14:35:31 [DEBUG] train episode 1826: reward = 135.00, steps = 135\n",
      "14:35:38 [DEBUG] train episode 1827: reward = 141.00, steps = 141\n",
      "14:35:50 [DEBUG] train episode 1828: reward = 200.00, steps = 200\n",
      "14:36:01 [DEBUG] train episode 1829: reward = 200.00, steps = 200\n",
      "14:36:12 [DEBUG] train episode 1830: reward = 193.00, steps = 193\n",
      "14:36:24 [DEBUG] train episode 1831: reward = 200.00, steps = 200\n",
      "14:36:32 [DEBUG] train episode 1832: reward = 150.00, steps = 150\n",
      "14:36:40 [DEBUG] train episode 1833: reward = 148.00, steps = 148\n",
      "14:36:49 [DEBUG] train episode 1834: reward = 156.00, steps = 156\n",
      "14:36:57 [DEBUG] train episode 1835: reward = 137.00, steps = 137\n",
      "14:37:08 [DEBUG] train episode 1836: reward = 186.00, steps = 186\n",
      "14:37:17 [DEBUG] train episode 1837: reward = 165.00, steps = 165\n",
      "14:37:25 [DEBUG] train episode 1838: reward = 141.00, steps = 141\n",
      "14:37:32 [DEBUG] train episode 1839: reward = 126.00, steps = 126\n",
      "14:37:41 [DEBUG] train episode 1840: reward = 165.00, steps = 165\n",
      "14:37:52 [DEBUG] train episode 1841: reward = 200.00, steps = 200\n",
      "14:38:04 [DEBUG] train episode 1842: reward = 200.00, steps = 200\n",
      "14:38:13 [DEBUG] train episode 1843: reward = 180.00, steps = 180\n",
      "14:38:25 [DEBUG] train episode 1844: reward = 198.00, steps = 198\n",
      "14:38:36 [DEBUG] train episode 1845: reward = 188.00, steps = 188\n",
      "14:38:43 [DEBUG] train episode 1846: reward = 125.00, steps = 125\n",
      "14:38:50 [DEBUG] train episode 1847: reward = 119.00, steps = 119\n",
      "14:38:58 [DEBUG] train episode 1848: reward = 151.00, steps = 151\n",
      "14:39:06 [DEBUG] train episode 1849: reward = 131.00, steps = 131\n",
      "14:39:15 [DEBUG] train episode 1850: reward = 174.00, steps = 174\n",
      "14:39:25 [DEBUG] train episode 1851: reward = 165.00, steps = 165\n",
      "14:39:33 [DEBUG] train episode 1852: reward = 158.00, steps = 158\n",
      "14:39:45 [DEBUG] train episode 1853: reward = 200.00, steps = 200\n",
      "14:39:53 [DEBUG] train episode 1854: reward = 151.00, steps = 151\n",
      "14:40:01 [DEBUG] train episode 1855: reward = 144.00, steps = 144\n",
      "14:40:12 [DEBUG] train episode 1856: reward = 200.00, steps = 200\n",
      "14:40:21 [DEBUG] train episode 1857: reward = 161.00, steps = 161\n",
      "14:40:29 [DEBUG] train episode 1858: reward = 141.00, steps = 141\n",
      "14:40:40 [DEBUG] train episode 1859: reward = 200.00, steps = 200\n",
      "14:40:49 [DEBUG] train episode 1860: reward = 150.00, steps = 150\n",
      "14:40:59 [DEBUG] train episode 1861: reward = 169.00, steps = 169\n",
      "14:41:08 [DEBUG] train episode 1862: reward = 144.00, steps = 144\n",
      "14:41:19 [DEBUG] train episode 1863: reward = 200.00, steps = 200\n",
      "14:41:28 [DEBUG] train episode 1864: reward = 168.00, steps = 168\n",
      "14:41:39 [DEBUG] train episode 1865: reward = 200.00, steps = 200\n",
      "14:41:49 [DEBUG] train episode 1866: reward = 200.00, steps = 200\n",
      "14:41:57 [DEBUG] train episode 1867: reward = 145.00, steps = 145\n",
      "14:42:07 [DEBUG] train episode 1868: reward = 191.00, steps = 191\n",
      "14:42:17 [DEBUG] train episode 1869: reward = 177.00, steps = 177\n",
      "14:42:25 [DEBUG] train episode 1870: reward = 149.00, steps = 149\n",
      "14:42:36 [DEBUG] train episode 1871: reward = 200.00, steps = 200\n",
      "14:42:45 [DEBUG] train episode 1872: reward = 164.00, steps = 164\n",
      "14:42:55 [DEBUG] train episode 1873: reward = 160.00, steps = 160\n",
      "14:43:04 [DEBUG] train episode 1874: reward = 165.00, steps = 165\n",
      "14:43:15 [DEBUG] train episode 1875: reward = 200.00, steps = 200\n",
      "14:43:26 [DEBUG] train episode 1876: reward = 200.00, steps = 200\n",
      "14:43:38 [DEBUG] train episode 1877: reward = 200.00, steps = 200\n",
      "14:43:49 [DEBUG] train episode 1878: reward = 196.00, steps = 196\n",
      "14:44:01 [DEBUG] train episode 1879: reward = 195.00, steps = 195\n",
      "14:44:13 [DEBUG] train episode 1880: reward = 200.00, steps = 200\n",
      "14:44:23 [DEBUG] train episode 1881: reward = 177.00, steps = 177\n",
      "14:44:31 [DEBUG] train episode 1882: reward = 144.00, steps = 144\n",
      "14:44:39 [DEBUG] train episode 1883: reward = 146.00, steps = 146\n",
      "14:44:50 [DEBUG] train episode 1884: reward = 179.00, steps = 179\n",
      "14:45:02 [DEBUG] train episode 1885: reward = 176.00, steps = 176\n",
      "14:45:13 [DEBUG] train episode 1886: reward = 198.00, steps = 198\n",
      "14:45:24 [DEBUG] train episode 1887: reward = 200.00, steps = 200\n",
      "14:45:34 [DEBUG] train episode 1888: reward = 171.00, steps = 171\n",
      "14:45:43 [DEBUG] train episode 1889: reward = 164.00, steps = 164\n",
      "14:45:54 [DEBUG] train episode 1890: reward = 179.00, steps = 179\n",
      "14:46:03 [DEBUG] train episode 1891: reward = 156.00, steps = 156\n",
      "14:46:12 [DEBUG] train episode 1892: reward = 150.00, steps = 150\n",
      "14:46:24 [DEBUG] train episode 1893: reward = 197.00, steps = 197\n",
      "14:46:31 [DEBUG] train episode 1894: reward = 139.00, steps = 139\n",
      "14:46:43 [DEBUG] train episode 1895: reward = 200.00, steps = 200\n",
      "14:46:54 [DEBUG] train episode 1896: reward = 196.00, steps = 196\n",
      "14:47:04 [DEBUG] train episode 1897: reward = 200.00, steps = 200\n",
      "14:47:15 [DEBUG] train episode 1898: reward = 200.00, steps = 200\n",
      "14:47:22 [DEBUG] train episode 1899: reward = 126.00, steps = 126\n",
      "14:47:29 [DEBUG] train episode 1900: reward = 134.00, steps = 134\n",
      "14:47:40 [DEBUG] train episode 1901: reward = 192.00, steps = 192\n",
      "14:47:50 [DEBUG] train episode 1902: reward = 168.00, steps = 168\n",
      "14:47:59 [DEBUG] train episode 1903: reward = 160.00, steps = 160\n",
      "14:48:10 [DEBUG] train episode 1904: reward = 193.00, steps = 193\n",
      "14:48:18 [DEBUG] train episode 1905: reward = 160.00, steps = 160\n",
      "14:48:28 [DEBUG] train episode 1906: reward = 186.00, steps = 186\n",
      "14:48:35 [DEBUG] train episode 1907: reward = 126.00, steps = 126\n",
      "14:48:44 [DEBUG] train episode 1908: reward = 168.00, steps = 168\n",
      "14:48:56 [DEBUG] train episode 1909: reward = 200.00, steps = 200\n",
      "14:49:07 [DEBUG] train episode 1910: reward = 177.00, steps = 177\n",
      "14:49:16 [DEBUG] train episode 1911: reward = 160.00, steps = 160\n",
      "14:49:27 [DEBUG] train episode 1912: reward = 200.00, steps = 200\n",
      "14:49:34 [DEBUG] train episode 1913: reward = 129.00, steps = 129\n",
      "14:49:43 [DEBUG] train episode 1914: reward = 161.00, steps = 161\n",
      "14:49:54 [DEBUG] train episode 1915: reward = 200.00, steps = 200\n",
      "14:50:03 [DEBUG] train episode 1916: reward = 144.00, steps = 144\n",
      "14:50:13 [DEBUG] train episode 1917: reward = 171.00, steps = 171\n",
      "14:50:24 [DEBUG] train episode 1918: reward = 169.00, steps = 169\n",
      "14:50:33 [DEBUG] train episode 1919: reward = 154.00, steps = 154\n",
      "14:50:42 [DEBUG] train episode 1920: reward = 155.00, steps = 155\n",
      "14:50:54 [DEBUG] train episode 1921: reward = 200.00, steps = 200\n",
      "14:51:03 [DEBUG] train episode 1922: reward = 150.00, steps = 150\n",
      "14:51:14 [DEBUG] train episode 1923: reward = 189.00, steps = 189\n",
      "14:51:28 [DEBUG] train episode 1924: reward = 200.00, steps = 200\n",
      "14:51:40 [DEBUG] train episode 1925: reward = 179.00, steps = 179\n",
      "14:51:52 [DEBUG] train episode 1926: reward = 159.00, steps = 159\n",
      "14:52:05 [DEBUG] train episode 1927: reward = 170.00, steps = 170\n",
      "14:52:20 [DEBUG] train episode 1928: reward = 200.00, steps = 200\n",
      "14:52:34 [DEBUG] train episode 1929: reward = 185.00, steps = 185\n",
      "14:52:46 [DEBUG] train episode 1930: reward = 164.00, steps = 164\n",
      "14:52:57 [DEBUG] train episode 1931: reward = 200.00, steps = 200\n",
      "14:53:08 [DEBUG] train episode 1932: reward = 186.00, steps = 186\n",
      "14:53:17 [DEBUG] train episode 1933: reward = 166.00, steps = 166\n",
      "14:53:28 [DEBUG] train episode 1934: reward = 192.00, steps = 192\n",
      "14:53:39 [DEBUG] train episode 1935: reward = 200.00, steps = 200\n",
      "14:53:47 [DEBUG] train episode 1936: reward = 142.00, steps = 142\n",
      "14:53:57 [DEBUG] train episode 1937: reward = 177.00, steps = 177\n",
      "14:54:08 [DEBUG] train episode 1938: reward = 191.00, steps = 191\n",
      "14:54:19 [DEBUG] train episode 1939: reward = 200.00, steps = 200\n",
      "14:54:30 [DEBUG] train episode 1940: reward = 200.00, steps = 200\n",
      "14:54:41 [DEBUG] train episode 1941: reward = 200.00, steps = 200\n",
      "14:54:50 [DEBUG] train episode 1942: reward = 174.00, steps = 174\n",
      "14:55:00 [DEBUG] train episode 1943: reward = 175.00, steps = 175\n",
      "14:55:11 [DEBUG] train episode 1944: reward = 200.00, steps = 200\n",
      "14:55:19 [DEBUG] train episode 1945: reward = 160.00, steps = 160\n",
      "14:55:29 [DEBUG] train episode 1946: reward = 167.00, steps = 167\n",
      "14:55:36 [DEBUG] train episode 1947: reward = 134.00, steps = 134\n",
      "14:55:46 [DEBUG] train episode 1948: reward = 174.00, steps = 174\n",
      "14:55:56 [DEBUG] train episode 1949: reward = 200.00, steps = 200\n",
      "14:56:07 [DEBUG] train episode 1950: reward = 200.00, steps = 200\n",
      "14:56:16 [DEBUG] train episode 1951: reward = 159.00, steps = 159\n",
      "14:56:24 [DEBUG] train episode 1952: reward = 153.00, steps = 153\n",
      "14:56:33 [DEBUG] train episode 1953: reward = 178.00, steps = 178\n",
      "14:56:43 [DEBUG] train episode 1954: reward = 162.00, steps = 162\n",
      "14:56:53 [DEBUG] train episode 1955: reward = 171.00, steps = 171\n",
      "14:57:04 [DEBUG] train episode 1956: reward = 200.00, steps = 200\n",
      "14:57:16 [DEBUG] train episode 1957: reward = 200.00, steps = 200\n",
      "14:57:25 [DEBUG] train episode 1958: reward = 160.00, steps = 160\n",
      "14:57:36 [DEBUG] train episode 1959: reward = 200.00, steps = 200\n",
      "14:57:44 [DEBUG] train episode 1960: reward = 145.00, steps = 145\n",
      "14:57:53 [DEBUG] train episode 1961: reward = 162.00, steps = 162\n",
      "14:58:03 [DEBUG] train episode 1962: reward = 200.00, steps = 200\n",
      "14:58:12 [DEBUG] train episode 1963: reward = 155.00, steps = 155\n",
      "14:58:20 [DEBUG] train episode 1964: reward = 159.00, steps = 159\n",
      "14:58:30 [DEBUG] train episode 1965: reward = 174.00, steps = 174\n",
      "14:58:42 [DEBUG] train episode 1966: reward = 200.00, steps = 200\n",
      "14:58:53 [DEBUG] train episode 1967: reward = 199.00, steps = 199\n",
      "14:59:03 [DEBUG] train episode 1968: reward = 200.00, steps = 200\n",
      "14:59:14 [DEBUG] train episode 1969: reward = 200.00, steps = 200\n",
      "14:59:22 [DEBUG] train episode 1970: reward = 143.00, steps = 143\n",
      "14:59:32 [DEBUG] train episode 1971: reward = 200.00, steps = 200\n",
      "14:59:43 [DEBUG] train episode 1972: reward = 200.00, steps = 200\n",
      "14:59:54 [DEBUG] train episode 1973: reward = 200.00, steps = 200\n",
      "15:00:05 [DEBUG] train episode 1974: reward = 200.00, steps = 200\n",
      "15:00:16 [DEBUG] train episode 1975: reward = 200.00, steps = 200\n",
      "15:00:26 [DEBUG] train episode 1976: reward = 156.00, steps = 156\n",
      "15:00:39 [DEBUG] train episode 1977: reward = 200.00, steps = 200\n",
      "15:00:47 [DEBUG] train episode 1978: reward = 141.00, steps = 141\n",
      "15:00:57 [DEBUG] train episode 1979: reward = 192.00, steps = 192\n",
      "15:01:09 [DEBUG] train episode 1980: reward = 199.00, steps = 199\n",
      "15:01:17 [DEBUG] train episode 1981: reward = 152.00, steps = 152\n",
      "15:01:26 [DEBUG] train episode 1982: reward = 166.00, steps = 166\n",
      "15:01:37 [DEBUG] train episode 1983: reward = 200.00, steps = 200\n",
      "15:01:48 [DEBUG] train episode 1984: reward = 200.00, steps = 200\n",
      "15:01:58 [DEBUG] train episode 1985: reward = 200.00, steps = 200\n",
      "15:02:09 [DEBUG] train episode 1986: reward = 198.00, steps = 198\n",
      "15:02:21 [DEBUG] train episode 1987: reward = 200.00, steps = 200\n",
      "15:02:31 [DEBUG] train episode 1988: reward = 169.00, steps = 169\n",
      "15:02:45 [DEBUG] train episode 1989: reward = 200.00, steps = 200\n",
      "15:02:59 [DEBUG] train episode 1990: reward = 200.00, steps = 200\n",
      "15:03:11 [DEBUG] train episode 1991: reward = 200.00, steps = 200\n",
      "15:03:22 [DEBUG] train episode 1992: reward = 194.00, steps = 194\n",
      "15:03:22 [INFO] ==== test ====\n",
      "15:03:34 [DEBUG] test episode 0: reward = 200.00, steps = 200\n",
      "15:03:45 [DEBUG] test episode 1: reward = 200.00, steps = 200\n",
      "15:03:58 [DEBUG] test episode 2: reward = 196.00, steps = 196\n",
      "15:04:08 [DEBUG] test episode 3: reward = 155.00, steps = 155\n",
      "15:04:21 [DEBUG] test episode 4: reward = 172.00, steps = 172\n",
      "15:04:34 [DEBUG] test episode 5: reward = 189.00, steps = 189\n",
      "15:04:46 [DEBUG] test episode 6: reward = 200.00, steps = 200\n",
      "15:04:58 [DEBUG] test episode 7: reward = 200.00, steps = 200\n",
      "15:05:10 [DEBUG] test episode 8: reward = 200.00, steps = 200\n",
      "15:05:22 [DEBUG] test episode 9: reward = 200.00, steps = 200\n",
      "15:05:33 [DEBUG] test episode 10: reward = 200.00, steps = 200\n",
      "15:05:45 [DEBUG] test episode 11: reward = 200.00, steps = 200\n",
      "15:05:56 [DEBUG] test episode 12: reward = 200.00, steps = 200\n",
      "15:06:07 [DEBUG] test episode 13: reward = 200.00, steps = 200\n",
      "15:06:18 [DEBUG] test episode 14: reward = 186.00, steps = 186\n",
      "15:06:29 [DEBUG] test episode 15: reward = 200.00, steps = 200\n",
      "15:06:40 [DEBUG] test episode 16: reward = 200.00, steps = 200\n",
      "15:06:56 [DEBUG] test episode 17: reward = 200.00, steps = 200\n",
      "15:07:09 [DEBUG] test episode 18: reward = 186.00, steps = 186\n",
      "15:07:22 [DEBUG] test episode 19: reward = 174.00, steps = 174\n",
      "15:07:34 [DEBUG] test episode 20: reward = 175.00, steps = 175\n",
      "15:07:48 [DEBUG] test episode 21: reward = 195.00, steps = 195\n",
      "15:08:01 [DEBUG] test episode 22: reward = 200.00, steps = 200\n",
      "15:08:12 [DEBUG] test episode 23: reward = 200.00, steps = 200\n",
      "15:08:24 [DEBUG] test episode 24: reward = 176.00, steps = 176\n",
      "15:08:34 [DEBUG] test episode 25: reward = 200.00, steps = 200\n",
      "15:08:46 [DEBUG] test episode 26: reward = 200.00, steps = 200\n",
      "15:08:57 [DEBUG] test episode 27: reward = 200.00, steps = 200\n",
      "15:09:08 [DEBUG] test episode 28: reward = 200.00, steps = 200\n",
      "15:09:19 [DEBUG] test episode 29: reward = 187.00, steps = 187\n",
      "15:09:31 [DEBUG] test episode 30: reward = 198.00, steps = 198\n",
      "15:09:41 [DEBUG] test episode 31: reward = 200.00, steps = 200\n",
      "15:09:51 [DEBUG] test episode 32: reward = 200.00, steps = 200\n",
      "15:10:02 [DEBUG] test episode 33: reward = 200.00, steps = 200\n",
      "15:10:12 [DEBUG] test episode 34: reward = 196.00, steps = 196\n",
      "15:10:21 [DEBUG] test episode 35: reward = 176.00, steps = 176\n",
      "15:10:32 [DEBUG] test episode 36: reward = 200.00, steps = 200\n",
      "15:10:42 [DEBUG] test episode 37: reward = 200.00, steps = 200\n",
      "15:10:53 [DEBUG] test episode 38: reward = 200.00, steps = 200\n",
      "15:11:03 [DEBUG] test episode 39: reward = 172.00, steps = 172\n",
      "15:11:13 [DEBUG] test episode 40: reward = 200.00, steps = 200\n",
      "15:11:24 [DEBUG] test episode 41: reward = 200.00, steps = 200\n",
      "15:11:34 [DEBUG] test episode 42: reward = 200.00, steps = 200\n",
      "15:11:45 [DEBUG] test episode 43: reward = 200.00, steps = 200\n",
      "15:11:53 [DEBUG] test episode 44: reward = 172.00, steps = 172\n",
      "15:12:04 [DEBUG] test episode 45: reward = 200.00, steps = 200\n",
      "15:12:15 [DEBUG] test episode 46: reward = 200.00, steps = 200\n",
      "15:12:25 [DEBUG] test episode 47: reward = 200.00, steps = 200\n",
      "15:12:34 [DEBUG] test episode 48: reward = 177.00, steps = 177\n",
      "15:12:46 [DEBUG] test episode 49: reward = 200.00, steps = 200\n",
      "15:12:56 [DEBUG] test episode 50: reward = 200.00, steps = 200\n",
      "15:13:07 [DEBUG] test episode 51: reward = 200.00, steps = 200\n",
      "15:13:17 [DEBUG] test episode 52: reward = 200.00, steps = 200\n",
      "15:13:28 [DEBUG] test episode 53: reward = 200.00, steps = 200\n",
      "15:13:38 [DEBUG] test episode 54: reward = 200.00, steps = 200\n",
      "15:13:48 [DEBUG] test episode 55: reward = 200.00, steps = 200\n",
      "15:13:59 [DEBUG] test episode 56: reward = 200.00, steps = 200\n",
      "15:14:09 [DEBUG] test episode 57: reward = 200.00, steps = 200\n",
      "15:14:20 [DEBUG] test episode 58: reward = 200.00, steps = 200\n",
      "15:14:30 [DEBUG] test episode 59: reward = 200.00, steps = 200\n",
      "15:14:41 [DEBUG] test episode 60: reward = 200.00, steps = 200\n",
      "15:14:51 [DEBUG] test episode 61: reward = 200.00, steps = 200\n",
      "15:15:02 [DEBUG] test episode 62: reward = 200.00, steps = 200\n",
      "15:15:12 [DEBUG] test episode 63: reward = 200.00, steps = 200\n",
      "15:15:23 [DEBUG] test episode 64: reward = 200.00, steps = 200\n",
      "15:15:33 [DEBUG] test episode 65: reward = 200.00, steps = 200\n",
      "15:15:44 [DEBUG] test episode 66: reward = 200.00, steps = 200\n",
      "15:15:54 [DEBUG] test episode 67: reward = 200.00, steps = 200\n",
      "15:16:04 [DEBUG] test episode 68: reward = 200.00, steps = 200\n",
      "15:16:15 [DEBUG] test episode 69: reward = 200.00, steps = 200\n",
      "15:16:24 [DEBUG] test episode 70: reward = 181.00, steps = 181\n",
      "15:16:35 [DEBUG] test episode 71: reward = 200.00, steps = 200\n",
      "15:16:45 [DEBUG] test episode 72: reward = 200.00, steps = 200\n",
      "15:16:56 [DEBUG] test episode 73: reward = 200.00, steps = 200\n",
      "15:17:06 [DEBUG] test episode 74: reward = 200.00, steps = 200\n",
      "15:17:15 [DEBUG] test episode 75: reward = 173.00, steps = 173\n",
      "15:17:26 [DEBUG] test episode 76: reward = 200.00, steps = 200\n",
      "15:17:36 [DEBUG] test episode 77: reward = 200.00, steps = 200\n",
      "15:17:46 [DEBUG] test episode 78: reward = 200.00, steps = 200\n",
      "15:17:57 [DEBUG] test episode 79: reward = 200.00, steps = 200\n",
      "15:18:07 [DEBUG] test episode 80: reward = 200.00, steps = 200\n",
      "15:18:17 [DEBUG] test episode 81: reward = 186.00, steps = 186\n",
      "15:18:27 [DEBUG] test episode 82: reward = 200.00, steps = 200\n",
      "15:18:37 [DEBUG] test episode 83: reward = 187.00, steps = 187\n",
      "15:18:48 [DEBUG] test episode 84: reward = 200.00, steps = 200\n",
      "15:18:58 [DEBUG] test episode 85: reward = 200.00, steps = 200\n",
      "15:19:09 [DEBUG] test episode 86: reward = 200.00, steps = 200\n",
      "15:19:19 [DEBUG] test episode 87: reward = 200.00, steps = 200\n",
      "15:19:30 [DEBUG] test episode 88: reward = 200.00, steps = 200\n",
      "15:19:40 [DEBUG] test episode 89: reward = 200.00, steps = 200\n",
      "15:19:51 [DEBUG] test episode 90: reward = 200.00, steps = 200\n",
      "15:20:01 [DEBUG] test episode 91: reward = 200.00, steps = 200\n",
      "15:20:11 [DEBUG] test episode 92: reward = 197.00, steps = 197\n",
      "15:20:22 [DEBUG] test episode 93: reward = 200.00, steps = 200\n",
      "15:20:32 [DEBUG] test episode 94: reward = 200.00, steps = 200\n",
      "15:20:43 [DEBUG] test episode 95: reward = 200.00, steps = 200\n",
      "15:20:53 [DEBUG] test episode 96: reward = 200.00, steps = 200\n",
      "15:21:03 [DEBUG] test episode 97: reward = 189.00, steps = 189\n",
      "15:21:14 [DEBUG] test episode 98: reward = 200.00, steps = 200\n",
      "15:21:24 [DEBUG] test episode 99: reward = 200.00, steps = 200\n",
      "15:21:24 [INFO] average episode reward = 195.95 ± 8.97\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO2dd5gUVfb3v2dyHsIEGBgYwoDkNBKUKKgEFXWVxV3DqivrrglddXF11XV/rKxZX9PqyppzWANmBTGgOCg5CMiAAwMMOQww6b5/dFVPdXdVdVVXVXd19fk8zzzTfavq1ukK3zp17r3nkhACDMMwjLdIirUBDMMwjP2wuDMMw3gQFneGYRgPwuLOMAzjQVjcGYZhPEhKrA0AgIKCAlFWVhZrMxiGYeKKJUuW7BJCFKotc4W4l5WVobKyMtZmMAzDxBVEtFlrGYdlGIZhPAiLO8MwjAdhcWcYhvEgLO4MwzAehMWdYRjGg4QVdyIqJaL5RLSGiFYR0TVSeRsi+oSI1kv/Wyu2uYmINhDROiI61ckfwDAMw4RixHNvBPBnIUQvAMMBXEFEvQHMAvCZEKIcwGfSd0jLpgPoA2AigEeJKNkJ4xmGYRh1wvZzF0LUAKiRPh8kojUAOgCYCmCstNozABYA+ItU/rIQ4hiATUS0AcBQAIvsNj7REULg9SXVOGNgCdJT1J+f7y7bhtE9CpGfmapZz8+1h7D9wFGc0K3AX/bNxl1ol5eBroU5aG4WeP2Hapw1qANSk8P7A1+t34XSNpno3DY7oPzDlduxuuYAOrbKxDlDOiIpifzLjjU24Za3VmJC72LkZqTg09U7MaFXEar3HcE5gzti2/4jmL92J84f3hnLq/cjOYlQkJOOP7+2FPdNG4jivAzsOHAUy6v3Y/xxRXj5+1+wff8RDChthcZmgfU7DuLyMd2QItm/ono/Xvhus395z+JctMpKRXZ6CtbWHMD4XsVhf+eqbfvx3KLN+L8z+6Jqdx12HTqGfXX1+GzNTpwxsASjykPHljQ1C1z98o8YU16IIWWt0a0wx7/s+6o9yM9MRY/iXP+6byypxq+GdER9YzPeX1GDswd3ABEF1Pnusm0YXV6I/Cz1c7xu+0EcONqAuvomPL5gIy4c0Rnt8jOQlpKEwpx0vLu8BuVFOejcNgu/++/3uGVKL9Q3NmNSv/b+OhZt3I3VNQdwap9idGydBQCo2X8Ejy3YiJz0FIw7rgi3vr0K14wvR9Xuw6irb0KXgiycNagjVlTvx74j9Vi8aQ/aZKdhX10DXl9SjTMHlWDGqG74Yn0tBpW2wrLqfcjNSMW3P+/GwaMN6Fmci6ZmgXb5Gag9VI+te49g1qTj8OHK7UhPScIr3/+C0weUYMueOqQkEZqEwJwP1mJK//b409hueHdZDQpz03HxCWUB19rWfUdw2kNfIj8zFXX1Tdh58BjyMlJwz7kD8MOWfdh/pB6DO7XG4k17kJeZiqFd2qAgJx1fb9iFd5dtQ05GCkpaZaK+sRkn9/ZdJ/PX7sQNp/bEja8vx/CubbG4ag8AoCQ/AzsPHsOgTq3w9YbdqD14DDsPHsXA0lZom52Oldv245U/jECHVplhrzezkJl87kRUBmAhgL4AtgghWimW7RVCtCaihwF8K4R4Xip/CsAHQojXg+qaAWAGAHTq1GnI5s2affEZDT5cuR2XP78El4/phlmTjgtZvmnXYYy7ZwFOOq4Ic393vGY9ZbPmAQCq5kxRLfvfj1sx85WluHZCD1wzoTysXWr1KcsB4M6z++G8oZ383+/+aC0emb9Rtb47z+6H91fU4Mv1u/DljeMw6q75AICJfdrhw1Xb/fsafdd8bNlThwenD8Q1Ly8Nqefmyb1w2eiuIbYoSUkiNDaLENv1fufNk3th9vtrQpar1fHsoirc+vYq1XWCj9tz327G3/63Eree1hubdh3Gc99uxvOXDsPI8paHcM3+Ixhx5+cY0bUtXpoxXNdONboUZGPTrsOqy37428lok50WUEd2WjJW3TERADB09qfYefCYZt0AsPGfk9Htr+9rLi9tk4lf9hzRrUPJV38Zh5H/mm94fQD49Lox6F7U8hDVOx6x4OzBHXDftIERbUtES4QQFWrLDDeoElEOgDcAzBRCHNBbVaUs5AkihHhCCFEhhKgoLFQdPcuE4cDRBgDArkPqN9iR+iYAwLZ9xm8eNfbV1QMA9hzWv5HN1dkQ8L1WRyT2HK7Hyq37Afi8WRmlKNXVN2LLnjrVuv31SL9Dj8Zm85PX7D4cvl6/DWbWPeRbd29dPbYfOAoAOHSsMWCdJMmL31B7yHC9Sqp2qws7ADQ0NYeUHZauKQBhhd0IZoQdAG5/Z7XpfTS7fEIivWvfCobEnYhS4RP2F4QQb0rFO4iovbS8PYCdUnk1gFLF5h0BbLPHXCaAKF+zsbpFmpsFjjY0h9igjE7c9OYKQ3Wt2rbflMC6AaW3FBSRQbIUbqgLEv1I6g7GDk3Ue3hEwpY95utzubarPkTtwEhvGQLwFIA1Qoj7FIveAXCR9PkiAG8ryqcTUToRdQFQDmCxfSYzXoR0ZEYAONrYpLkcADYqPFetUCMBmPLQVzj9/30ViYmupq5B//ho4bTujb/3C1vri+TNyu00Njnzm4x47icCuADASUS0VPqbDGAOgJOJaD2Ak6XvEEKsAvAqgNUAPgRwhRAisiuP0UfP7Yr/3flpFsIW70v2ercaCFOt234QzQ4ISaS/Q2s7t3uldtMUwTkR0iPspx0H0eiQl2yFBoceWEZ6y3wF7ft6vMY2swHMtmAX40LsvASDwwvB35Uor/3qvXU2WqHNqQ8sxF8mHoc/ju0Wdl1h4MjsP9Jgi7BE8wFr5HdFm0i93A07D+GU+xcaOp/RpqHRmQeOK1L+MonH/iMNqKtvREOT0O2mCQSGWS54arGiXGN9WyyEvxHXDob84xM0NgtcMz58byN11H+VGwXYSSKJTx840oiDUueDHzbvtdsky0TyNmIEFvd4Jsr3tZ1e42MLNuK1ymrsOnQMVXOmhPHcYyRsNv7gSGLFar8vuI97ohGJEE77d8sQGzc+Cq89uYcj9XJuGQ9jtw5YuTF2Sl35lGh14QzZr8kdey4OrXciHfytbjyOTVaNcuFv6l6UHX6lCGBx9zBuujm3q4i7Uex6a9XrkaO+vvuRD0082GoHVnuWuDOM5czZY3H3APFwY1t50Gh1bTQbczf7JhPrEIjy9zn1oHaTA2AEMyPq1be3yRAbSXLoMmNx9wB2iZkWsRY57Zi7SzBhyIOfrbe0i+Az4aRYPbpgA8pmzUO9Q705YkGlCxtUnbq/WNw1+NVj3+CsR7+OtRmuQM9buublH3H87E/D1xF2De0LXCssY9WLC4cb34ii+Zx9/tstAFrSWCg59/FvomeIAtc80G3EqVPKvWU0WOLCJ7wWsRSht5c6n1nCbG4QvRGqZoh1xxQj8fSoxJBVDPi+Kjb3hxvDKlZx6jpjz50Jix2vjVa8bK1NN9bam7ckGDd67onOkQjTLLgZsw39RmFxZ8LidPgDCDdCNTbumtGHWjSsM9uozMQP7LkzrmBj7SGMv3cB9prMrGhFg4wI2MqtelmoI8MtnjuRIkSjYVQ0G70nPrAwavtKBFjcmYix6t0pheOxBRuxsfYwPlmzw1Yb9K5vs5579V71xGAfrzZnsxsJfoWXj0xTs/Dn97d9n0EnZ+32g47sJ1Hh3jJMwmJ2ENPT31SplpsWJYP3nJWw1ZqaA7ju1aUhw+rv/GANFqzbqbGVOh+viv+HVyLCvWWYEIz2lIh1rw8fVhpUYxRzj0Jg5o/PL0HV7jpcdVJgQrF/f/GzITuicWxmvbHc8X0kMhyWYSLG7Y1u2/cfxQvfbdFc7vZp0p78cpPj+4jlIXh/xfbY7TwBSOKwDBNrhIhcZPQaPP/8Wuhk1kpiNfkOEfD6kmp8L81kb5adB43n0wnngfuXBumAy597jAGcej9kcWeiwm3vrNJc1hAmGVSsBIwAXP/aMpz7+KKw66px/WvhwxlmG9NcEWFj7CVWYRkimktEO4lopaLsFcWUe1VEtFQqLyOiI4pljztjNgOEjwnb/bbnWOw+jHjHLOZu8fceMzHgRu8XuqPNhHEKp9p2jDSoPg3gYQDPygVCiF/Ln4noXgDKKWs2CiEG2mUgo024BlU7NFEIgVe+/yWkvmjqjdtj7gzjRozMobqQiMrUlpHvnXIagJPsNYtxCz9s2YvVNfYPEAogzJMiZjF3i48wIx73pl3GUijwCFXGLFZj7qMA7BBCKPOYdiGiH4noCyIapbUhEc0gokoiqqytrbVohjv5fO0OHDrWGLP92/E6f+hYFHJ5hBGo2KUfiMludYl1+mUmfrAq7ucBeEnxvQZAJyHEIADXAXiRiPLUNhRCPCGEqBBCVBQWFlo0w31s2nUYlzxdiRtfX+b4vsLd71YyBwYLayxmsomVcxpNHV34U2QOjjtnFmLcQMTiTkQpAM4G8IpcJoQ4JoTYLX1eAmAjAGdmf3U5hyWPfdOuOsf3JYQv5/bSX/Y5ULfz4hFuCr5YNahabVkwY/bf311tyAr5c31jMyrDdNHcV1eP+SZHuTLewYrnPgHAWiFEtVxARIVElCx97gqgHMDPGtt7mmi/PV/36lKc+cjX2K0y6bSV2HFz0CQ8TrTsb9mj/wAMtiFamE2OpsUaB9os5nywFuc8vki37tvfWYWL//u97ftm4gMjXSFfArAIQE8iqiaiS6VF0xEYkgGA0QCWE9EyAK8DuFwIEdkIEI8QrXS5y6t9HZbU8l1beXUPnm0+FmGAWMXcP1zVMjJz/Y7IkmXtOnQMkx780rItwYdg7fYDUv3aD6AlW+JnwhnGfoz0ljlPo/x3KmVvAHjDulnxTzTyklhFCIFf9qhnUJRpjlVXFaUNsTcBJ9+/EO9dNRJ9O+Sb2u6wzQ3q8huh/F/v0HBPmsSGR6gmMK8tqcbou+frrtPoAmWNXcw9kG379B+EwRDZ95DXfGNyybFhIiczLdmRelnc45jwOdL1heXHLeEbYINT0frr1mhUcEKI43UQkxD2tL0o65DPqfxfK3XDoWONrnjrYvR59Q8jkJPuTHJeTvkbJxxrbEJ6ijNPeD2Meu7NzQKNzQKpye4PR0VKQ5NAc7NAUlJ0f2N9Y0uLcn2Tr01FFvw73gvtZXO0oQl9b/soKrYx1miXl+FY3ey5O4SdvWU+XrUdPW/5ECu37g8oj0aPnCaDXVVueXsletzygaejBFe8+ANuenOF4fXtOhQPfb4BG3YeAgBc8nQllmzWbyg9Uu+9SaS9ipP3MIu7w9ghdnJf5WXVgWGUaAhpiOcuff141XaUzZrnL35RysfuYW0HALxS+Yvhde28b3ccaOniGq5/Ow9ijR+cfAtkcXcIO28wWcRj0QNHK+YezflI3fTAMBt2ckpoOQ2BN3Aywsfi7jBqvRwe+PSnAK/XKEbuZ7u9+RBxD5fqQGHAw5+v11kzPslMNdfu4YQIhzvF8dANl/GR7OBDmsU9BjzwqTnRa/HcI0NL8I1cV0pxNzKASbnGPR//FH4HBliwzj2J5VKSjd8yTr1xhH2AB53XvAzuN+FWOCyT4MhdAYPFWE2czTgCTsTs47XbohcQGtcJ416cmj8V4K6Qlnh2URU+WLEdL80YHrLMiVfj4DrVdFStzMr1oxRrAoV1R3ve8mHkO/MgTty6T3+zKaCBVckL323Gc4s2B5Tx49a9OBmWYXG3wK1va88LKmPLbEhWt7dQgXJbTi9rfpCWE/eulrADwM1vrQwt5NPmWpIcjJ1wWMYhnOgtY7cbaMRGHuRoDTc0bvIpjA3ZBtIKOBmWYXF3GDtvLK3LIFoC4gahYszD7SCxwUiSuWRuUI0/7DxlZsIhAXlIwnZbNLdvDsu4m8e+2KhaztruXthzT3Skm9PIhRAQI1e5qWsPHsPseas1Byfp1ccAe+saDE9qDRHdB+LPtep28UM5Nhg56jyIKcHxh9xtuBBufmsFnvxyExaur42oPpYJ4IKnvjOUcfHnXYfw6vfVYddzGm43iREGjjuHZeIYO1LgRlqHmng3NDVbqpPxHcOF68MPrNp1qB73f2rPQC5L8KmOCUbemJxMI2Fkmr25RLSTiFYqym4noq1EtFT6m6xYdhMRbSCidUR0qlOGux0nzpkjPXDM2mBy/YamZvznS29NoysE0KiRR92NcFgmMTHiuT8NYKJK+f1CiIHS3/sAQES94ZtbtY+0zaPyhNlM5GjdmrG4Zc3u8/lvN+P/5q1xxJZY4uSxt/util/SYkOsj3tYcRdCLARgdJLrqQBeFkIcE0JsArABwFAL9sU9dpxf+SLZpNFgZjdfKkIOVoVm/5EGq+YwFmFtjw2xPu5WYu5XEtFyKWzTWirrAECZ8LpaKguBiGYQUSURVdbWuicxlH1EHkP5ccteVO+tAwB8snoHjjT4Jl946PMNlvdgJMZ3wVOLI6hZHZ7qzTx2e3zcvhIbYn3cIxX3xwB0AzAQQA2Ae6VyNeVQ/YVCiCeEEBVCiIrCwsIIzfAmZz36DUb+az6WbN6Ly56txCdRzJ0ejNXr06va7uSNa3fNXj0HbmbxX8fHp+cuhNghhGgSQjQDeBItoZdqAKWKVTsC2GbNxMRl274jttRj5iKza58yTR70Gnce1M7twjAAAIqDmLsaRNRe8fUsAHJPmncATCeidCLqAqAcgH3v+PGIhRMcbi5MK9eO1oV3wpzPTW+jh1fDMnvr6h2rO9av84x13JCqI2xWSCJ6CcBYAAVEVA3gNgBjiWggfPpSBeAPACCEWEVErwJYDaARwBVCiLiarXfDzkPoVpgdtf0dPNqAw8ea0C4/dBZ0u7o+WqkmWGbMCo/RkbDxRh1PQs2EIdZXflhxF0Kcp1L8lM76swHMtmJUrFi8aQ+m/XsR/jG1j211hjvBJ9+3ENsPHEXVnCkhy1JMzteppKGpGTsOHDVkg1Eiedh4VNsdfeX26CHzFCd2b4uvN+zWXE4U6gglUXTvBx6hqmDTrkMAgEU/a5804xg7i9slAVbDaFIhtdVmvbFCtdeLWX0+1tjioUYUlvFoiMHZfu4OVs7YwtzfHY8OrTI1l2emJoecxxW3n4pZk45z2LIWWNxVeH/Fdtvq0gtjhAtxGM07oaymUXINPlkd/jcYqf2R+YGZBuul9AVG8ay4e/R3MeG59bTeSE9JxkfXjkblLRNU18lOTwkZGZydnoJ+BtIA2wWLu0PYce+nRJBU6JT7vzC8biQmmn3wNXo1LuMgnC7A3bTKSgUA5KSnoCAnXXO9gaWtQspO7F6Aj68d7ZhtSljcFbihhVtJssE5uJRhmQYp50k4eYiWfLz43ZYo7Sm6OPlGwi8FseWKcd1w7pCOmsvb5YV2flDj1tP6YN7VI0PKexTnAgD6lORFZqBBeA5VhzByf4a7ifnV373wqfEu+ZmpSE9RT4n18ozhGN61raF60lKS0KdEPQzz3lUjUdomK2IbjcDiHgG3vr0SzwbNMK+FmzXAXe8p8YWbzytjDV/WT/W2JaPCHg4jU/BZhcMySgyo3dyvNhkSdjs8O7sERPkG4GD66ITCqw3FjI8GE21Fz1ziztyILO4mmfPhWlPr62lAuMvnhteWmdqXHt/+vBtls+ahlofO24Kj/dz5uRFTBLQ9dzX6Ohw7jxQWdwWGnFqDN54dPR4OHG20XAfgywQ596tNAIBl1fs11yubNc+W/SUCziYOY3WPNQ0mJmNxcjYlK7C4xzFG9UUIERKO4cZaa7Dn7m3ktBnFedpdHd0Oi7tDGLlB401gX/Bot8ZIcHSEqoN1M8ZobPaFZVKTw0ukO/12FvcA7Hi92nu4Hhc89Z0/tu3EK/Yve+rwu/8uRl298bCN2/rwxztONqhyY21sEaIlLJNmQNzD8d+Lj8fj5w+2XI9ZuCukScKJ9Uvfb8GX63cZzgsTCXd9tA4L1tXiaINOZkLWB0fhsIy3kRtUjSTvC3erj+tZZIdJpmHP3Qb088fobGdxv0a98cPHmvDhqsC0AXM+MNfrhwkknibI9hrdi3JwXLtcpKc4I18Cwp82w0hYxq3Er+UOYEQq1e47ZVk070ujIR+1zJM/7zqMX/baO+tSQuFoWMaxqj1BbkYKPpw5Gredbl9q7mAapbBMiqGYuztDnizuDsNOmDexJy20Ouy5R5fbTu8d8F2IlgbVNEVYpkRlQh03w+JuErXbzmhZyDoR3sPy5WZVA9zpb8QH31ftdaxur3vuo8oLLG1v97NvgEr2RrlB1UhYJvgNes7Z/ewxzCJhLSeiuUS0k4hWKsruJqK1RLSciN4iolZSeRkRHSGipdLf404aHwvUvCq1smgKZ6Rtty4de5HwbN59ONYmOMpvh3W2pR4nB3tdM6EcxXnpqOjcOuy6wWmtpw/t5JRZpjDiuT8NYGJQ2ScA+goh+gP4CcBNimUbhRADpb/L7TEzOqiJ3Serd5iqIxav1M3NwNZ9gfFzJ98cGGc55/FFsTbBU3QvyjG1/pR+7TG4U2t899cJyMtM9ZdrdZV26zzBYcVdCLEQwJ6gso+FEHIn628BaCc/dhGrtx3A6m0HTG1z2bOV+OKnWkW/9VCUZWYEM1LPQ77G5K3nraiJqB53XpIMo4/R6zYtJQlVc6agf0fjGRir5kxBWUG2//v4XsVht8lXPAB+Ndg9UmhHzP0SAB8ovnchoh+J6AsiGqW1ERHNIKJKIqqsra21wYzwTH7oS0x+6EvT2100dzGm/Vvbm4q1B3zoWGQ5aDgqw0SLR38bvUE8Y3sWAlBc3wbvzz+M7hpS1qUgG5//eYzudhmpyaiaMwVVc6bg3mkDTFjqLJbEnYhuBtAI4AWpqAZAJyHEIADXAXiRiFRTpgkhnhBCVAghKgoLC62YYRt6MehNuyKLgwaHS9wEx9wZs5zcO7wnGy20nKq7zwkUWCPaXjVnCm6a3Et1mdyo2mBy/uBYE7G4E9FFAE4D8FshBZqFEMeEELulz0sAbATQww5D3YyAQENTM37cslf1Qlq0Ub3bXKw9foYxS6T+gK1+hNCfStLs1MPh7kN5sFS8zQcckbgT0UQAfwFwhhCiTlFeSETJ0ueuAMoB/GyHoW5BaxDTnA/W4qxHv8G6HQdDlu865EwOdbPtB8HwwyXxmGAghqyH3tve+OO0h9knKRS3Z7tc1XVG9zD2Bh/uspVTf8jrWe3k4PfcGz3muRPRSwAWAehJRNVEdCmAhwHkAvgkqMvjaADLiWgZgNcBXC6E2KNasQuxMtJs5VZfnvTdkpAfqdfJ+2ITWrH2n3Yc5IEwjCqzz+praXu9nElPXFjh/7x+9qSAZcmK7boUZPsnh1b2eTd69+ld2mcP7hCa3tpgvVqkxqnnHjZxmBDiPJXipzTWfQPAG1aN8gKLq1qeaWYuCTOZHrXYecDYmwLH3BOPSM95EvkGV+ltn6zwzoMH/yQHxUoyUn0TUF91Ujm+XL8rItuaVcQ2PSXJ32XRrsF+cmZIedRqvMAjVG3ATid5wr1f2FcZw0gU5Ngz6USkb7dJGoFwZbHZ+2j/kQaV+siQ5/7ulSMN7ydVSkFwkk7YyY2wuNuAiLDHutrgh237Q5N8KWloajbUan84CmEhJg6J4EK9dkIPf0oE2eM2vdsg5bYSNpTvtk5tsgD4wjwyyUlkKN12YW7Lw64gJ013XSLCN7NOwoPTB0VibsxgcVcQ7TDFzFeWmt5m7N0L8P6K7brrGH3UxFkIkbHA8WW+YfTpEYhzu/wWIcxMMy4Zyr7tWpea2j13/Sk98Mex3TTr7d/Rlwtm6sASfHDNqICG3CRqebcw2qDauW227nIAKGmVGfGDLVawuNuAEIjIIzKb2gAw1m/+wBFjcfuFP0Vn8BgTe+6bNhDzrh4ZMJrSKMpQTEqScclobzKLoiz05cW5uj1T5CyORIRe7fMCnJSuhdkhnnui+jA8E5MN9LntI79n5IZOKk51vfQCcsNgopGZlow+JcaH4QfTLi9DdV4APcweZvneIQDFeb4HQ1pKEuqDhD49JdCDlt9Uxx9XhAuGd8Yxaf1wI1Q/vW40ctLNP+ziBRZ3mwi+AGPJu8u2xdoE15KSHCoWTHjeuepEVO89Evm1JXS/BkBEuGRkF3QtzMa7y7bhf0v19yk/FE7sXqCa3EsrTNm9SL2/vVfgsIxNyH3Ov9sU2q3/6pd+xP2f/BQ1Wyo3O5drnPE2Wkm2inIzMLhT+PS3SpRvsVlpWvFq9Yau5CTC+F7FKMoLH9qRY+qyrsv/5QbXRIXF3SbCJe968LP1uOzZSs3ld7y72m6TGDUSMCRjhm6Foelxs9NbXvAj7Qo5tEsbPH/pMHxxw1gAxsOX104In71ErkqOtaenJOOJC4bghcuGae4ruN+9F/GcuL+3fBtG/uvzqOdYNrI/vQbUuV9vstMcJkJyMxInUqnsQigTLHlnDizBpL7tdOv5x5l98Zhq1kffPTGoUysQEUaWF+j2TJFzpysnvk4zMAl2c5DnDgCn9GmHolyf1x8s7vdNGxDQFdKreE7cZ72xAtV7j+BwBCM9tZLxG8GtCfsTlXvOVU+9mpeZOOIdjtP7tw8pC74Hzh/eOWAAktotUto6E5P6+ep66LxB+OTa0QCAZKlnTU66sWP+9zP64JYpvUxPw5cpdVFM13gQBMfcz3ZRznUn4StdgZUXNdb22JOaTP65L88Z0hHXv7YsYPmEXsW4ZUovjL1ngWYdGanJOHjUegqIeMVqtOKMASX+zwM65uPGiT0xraJUc33lwyInPQW/HxWYU92IOTMn9EB2eoqmaMue+w2n9oyoK2i84jnPPVY0u6EPJBPALVMC83P/56KKgFl21Hjg1wOdNMn12DmQj4jwp7HdVVMfqN0tRkIwamSnp2DmhB6ak1nL++pWmIPzh9szf2s84Flxj3ZzCWu7+wj2AtV4cHqgmBclQCxWDyuZUSPbX5jltj5s7KsrHvCcuMcq1S177tHj8fP1p2wz2hPikd8MxtSBHewwKa649bTe+I8iPa+ScANQ1Y6slbYqAJg5oRyv/mGEpTr0SNRb03PiLvPct5uxonq/qW2sXKMs7nSzpakAABqTSURBVNFjYt/2uOtX/TFr0nGqy5MNnsgpKg2K0cTJKesGdMzH2YPVH1yXjOyCCb2LVUMjM4O6Hjp1Vd9zTn9M7tcOfTvkY+aEHhjapY1DewLkX5Fgjrt3xf2uD9fh9Ie/MrTu/iMNeODTnyz1eDnawKMeo8m040tx+ZhuqhMvx8vr92kOPlx6tsvFfdP02w8uGN4ZA0tbBZQVhxk0JB/b3u1Vp0Y2THlxLh797RDNOHnL/giXjuyC+yxMPO1PaxAvF4ZNeE7cI5Hn2fNW44FP1+PTNTttt4dxlsn9WgRSfjanRNjl4+HfxFdKV5lHfqPygDPgpxblZeB/V5zo/37dyeEHDF04ogyd22bhvxcf7++y6LRk/u203pa6L153Sg90LczGsK5Ovh24DyPT7M0lop1EtFJR1oaIPiGi9dL/1oplNxHRBiJaR0SnOmW4Ub77OXRy6h1BCZDqpNzn8TZHIuNjbE/f3Jvym5fWxBDhOK1/iesHsI4qL8CgToHetlp4KRIn9erx5WHXKW2ThS9uGBfWw3cTfUry8fmfxyIvI3G6QQLGPPenAUwMKpsF4DMhRDmAz6TvIKLeAKYD6CNt86g8YXas+PUT34aUBYfH5de1yKbcYOxmjMGJkmUe/HWgxx0vQ8sjCRM8d+kwXDmuu6129CnJw5908qcz8UlYcRdCLAQQnA1rKoBnpM/PADhTUf6yEOKYEGITgA0Ahtpkq20o76mFP9Vi7+F6Q9tt2HnQIYsYJQ+ZnfFGOp8Zqb7L+cZTQxtazx3ivlGJcpposxiZacjMc2Pe1aNw40T1xmkmfok05l4shKgBAOm/PBVKBwC/KNarlspCIKIZRFRJRJW1tfZNGmGk04p84Tc0NePCuYvx1YZdhradcN9Ci9YxSpIIIV3yquZMQX5WZK/PqUlJqJozBb8Z1ilk2d0a6QgA4Lyhpf55Mu3gvKGh+w+mas4UtM/PjGwHUXwx0bsnuIOYu7G7QVXtslO9BIQQTwghKoQQFYWF5l7DrSI3NgV3X+RrNXoM7tQKP985xZa65HSyRgYtqXHn2f2xfvZkW2y5Ylw33Hl2P1vq0sKYtkfvCZBgnVDihkhzy+wgovZCiBoiag9A7mZSDUCZSKIjgKjOHGEkbi6vE+x5rNxqrl98JNTs154mb39d6GzuXiXSB+mEXqEz0Kcm+zx2NxCNHEPR7NLXua12TvQzBpbgqw270FUlTTATeyL13N8BcJH0+SIAbyvKpxNROhF1AVAOYLE1E+1nf10DqvfWhZTX7Dc3jVgkjLjzc81lUx8x1i8/kXlSY2SlFmYHxxSq5EExQ3MU1L1rmPw4drHxn5N1e8VMqyjFxn9ORodWEYaXTHKKg4O+vEhYz52IXgIwFkABEVUDuA3AHACvEtGlALYAOBcAhBCriOhVAKsBNAK4QgjR5JDtEXPy/b7Y+dp/BHcCii1Vu0MfOF4lUt/TrNf64u+HoclEcLh1dhrW3DERjy7YgP/3+Qaz5jma+lkW0dI2WVh9x6nofetHmuva4dwb6XUUzZ5Jj50/BA1N3F3ZKGHFXQhxnsai8RrrzwYw24pRTOLgdIQhJTnJdOwxMy3Z9MOndVYq9tY1oNFBcVdOVZeVpv+rjKZgiCeSkwjJSTHtWR1XeG6Eqhm4tT9xaZ8f2SCcN/90gmq5nJNFzXOXM03+74oTMSBouL8Wc1QaZcMN1VcSJ139GQfx3GQdLNjxRSwczMV/HY9MzQmb9VGb7KFj60ykSF0p1Tx3+Te2y8vAS5cNwz6NhvOFN4zD6LvnAwB6qeRuMdJ+UNG5NSo37414lC7jHRLbc+fOj66hT0keFt+sGumznaK8DOSGG4oe9NSR59wMlswPrhmFD64Z5c9n09Tsiwkvuukk1Wqz0lJQotEA2UnRM2VAaSu8f/Uo3DG1DwDfDEc3B00+olpHG18dXgzLMObwnuduZl3WdtdQmJvun9DYDSilsVVWKnLSU1B78FjIerKHLY8alT135QClSK+z3iV56F2Shz4l+RhY2spQ46XceCyv+82sk9iFSVA8J+5m6HObdm8DJnJy01Nw8Jj+PKRyr5doz/xjFGVMXqt740czR/s/F0ldBvW6BUbqTA/pHD5NQW5GCg4ebfTH/OXjq/WWoEcS8ZzAXsB74s4XZcwxImIdWweKjtveoqZVlCIlOQnXv7ZM0zblAJ8xPQrxnwsrMKZn6GjraPy0jq2zsKbmgF/cTbS9hvD1rJOw40DoWwoTXyREzL1s1ryYTb/nJe6Y2ge3nd477HpG+qL/8yypN4g7HXckJREm92sHAOjQOlP1+glO4DWhd7Fqjxb/ZBEG9jvv6pH44JpRpu2V8Yu7hZh7+/zMkEk8mPgjIcSdsYeuBTno1yHflrqy06WXRhc/c7PSUvD4+YPx7KUtiU2VD65w+vncpUMx//qxLQUq67975ciA731K8lV7yoRDrlrOl8S9ZRjvhWU0YMfdHozEYs04jekpPv8iN8Odl+LEvtpT4YVLvTuqPHxCvH4d7XlYymSk+rp45qS783gy0SNhrgDWdnswMhG4GZ9xRLe2uHlyL0yrKA2/ssuItW887+qRqNrlS1khP2cuPrELerXPw4UjymJnGOMKPCfuWn3XOeZuHQFh0HM3LntEhMtGR5aqN9YY/Zny24ndPYP6lOSjT0l+gC1pyUm4wuaZmpj4xHPirgVLuz0YmgzFeTOijlofc6MPsed/PwzvLduGgpw0u81iGE0816DKDrpzjOja1lhYxoPq/p+LjscfRndFmU5+cy26FGTjqvHlUcnDzqOuGRnPee5aWflY9K2TkpxkcHCL99S9S0E2bpocfvh/JDx+/hBs3ac9iYsR3DoYjIkdnhN3LdijsQcjbRde9NydZGLfdrbVxU4MI+O5sIwWfNEb59bTtAcqWY25X39KD1wZ5w1+/75giOtmBfq/M/vi+LLW6NkuN9amMC4hYcSdsca1Ur5yrZj7uUM6+j//65z+mvVceVI5rj+1p73GRZlT+7TDEyan+3OaAaWt8NrlJ/j7uTNMxOJORD2JaKni7wARzSSi24loq6LcnmnlGcc5fUCJavlvhnXCNRPKAWgPYrr73AH+z+N6tkxiPbpHIf44tpt9RjIMY4iIY+5CiHUABgIAESUD2ArgLQAXA7hfCHGPLRbaRCKEZVKSyNI0b3I8XS9mbqS3jJJnL/EN3X9swcaI7WIYxjx2hWXGA9gohNhsU30RseyXfZrLEqFBNSCPiUlGlRf4P+vFzDmhFMPEB3aJ+3QALym+X0lEy4loLhGpJqMmohlEVElElbW1tbYYMfWRrzWXec1zV5vuLdJkUWcOLMFzlw7DOVLcfLBO/vDivAxUzZmiumxAR+2kV7npKZg6UD3swzCM/VgWdyJKA3AGgNekoscAdIMvZFMD4F617YQQTwghKoQQFYWF4RMsJRpf3DBWc1mb7DQsu+2UkPJIeyA+MH0QAGBszyJUzZnin6rNLG9fqZ2udsXfT8WD0n4YhnEeOzz3SQB+EELsAAAhxA4hRJMQohnAkwCG6m4dJeLNcU8i0vSQ5Th2MHb1Lw+ePNprbz0MkwjYIe7nQRGSISJljtSzAKy0YR+WibfEYZGEWIJT0K6fPSmifaenJGNDhNsyDOMOLIk7EWUBOBnAm4riu4hoBREtBzAOwLVW9mEXf/ufK54xhtHTdi0PPbhYbVagFIMPjRTFtlr7O6FbW9069OYTZRjGWSylHxBC1AFoG1R2gSWLHOJ/S7fF2gRTyF74whvGYfTd8wOWpSRpPJMN6HZ6ShIa65tM2aL20rPstlOQmZqMHrd8gKFd2oQsX/n3Uw0/SBiGsZ+EyS0Tb8jeco7KDEVWJj8e07MQ76/YHnkFEnJvne9vnqA6ixLPBMQwsYXTD7gUvSnckrU8dwPNCveeOzDg+12/0k4V8CcDI0sLc9N5yDvDuBAWd5cii7uaxGvNbN9koNFYnhVIpjg/Q3PdDq05Zs4w8QqLuw04MbmzHK5W03Etx91C5gFV4qyDEcMwCljcbaAgJz3gux1D9OWukGqTMMgNqp9eNyZA/NPCBONL22QiKYmw4Pqx/p4s3OTJMN6ExV0HoyM1lQJ7Wv/2qNAZvm8UOSyTnhp6imTPvXtRDkpbt9hYmJuuOcDp4d8MwkczRwMAygqy0aUgO8T2YHjSDYaJX1jcdTCaAVGpganJSbaIohyWCY6RA9oxd8CXYhcA2gfF0sf1LEJWmrnwEYdlGCZ+YXHXQU/chpa18c/Go+zZkppMtkyE7G9QValLWaa2q8fPH4I3/3SCbv3xkiXzinGcC55hIoE7I+ugl7LgvGGlyMtIxcerdwSJexKabGjZ1A2XhNlWbU5O7VGt2rW5ISzTLp977DBMJLDnroOeRCtFUSmCOekppie0CFf/WYM6BCxTPkyM6m+wiBsxUU5foBYaYhjG3fBdq0M4ASR/d8UW4bxmQjmabfbc5TzoPYtzcfPkXsjPCs3lbqY+AEiTBFsvQ8BZgzrgynHd437OU4ZJRDgso4NeXJqoxRtW6mNWWorqVHdDOrfGks17w+7zohGdUZSXEZD0a1R5IWaM7orLRnVFYW5gt8vkCPO33PWr/njqq00Y1lU7+VdqclLMhd0FkSGGiUvYc9chbOhCUp7UoLCFmuNuNOVwr/Z5uGJc94Cy5CTCXyf3ChF2AHjywgp0bpuFO6b20Tc1SCWL8jJw0+ReET8cGIZxN+y56xAuutLU5FshOCatFpYxGqkxm8e9a2EOvrhhnKltGIbxPuy566KvyA1NzQBCxT04LDO8axvceXY/Q3u0O03uyzOG49whHcOOXmUYxlvwHa9DuEhKvV/cA7MiXnlSYFhl7u+OR6/2eehWmB12n3aHSYZ3bYu7zx1gS997hmHiBw7L6BCuS6Oc4+VYY+DkF/LQ/mD00vh2apOFxqZmjC7nycIZhrGOJXEnoioABwE0AWgUQlQQURsArwAoA1AFYJoQInw3ERei28+dCCO7F6AoNx2Xj+mGL9fv0l5XannV88rv//VADLEhJw3DMAxgT1hmnBBioBCiQvo+C8BnQohyAJ9J313Pe1eNDCnTc9yHdWmD/KxULL55AirKjIly25w01fJlt57Cws4wjK04EXOfCuAZ6fMzAM50YB9RQav74ntXjURxXktiLr1wC9DSDVFtwuolt0yIaFBSosBNBQwTGVbFXQD4mIiWENEMqaxYCFEDANL/IrUNiWgGEVUSUWVtba1FM5yhR3Guanmw5qvpz/EGvXmGYRgnsCruJwohBgOYBOAKIhptdEMhxBNCiAohREVhofsaETNSk1QHDamh1hPltcv1szLKxEduRncwQmc0LcMwgVgSdyHENun/TgBvARgKYAcRtQcA6f9Oq0bGgmahHXMP1nKjvRc5P7o14iVNMcO4gYjFnYiyiShX/gzgFAArAbwD4CJptYsAvG3VSLtR88iDhVcI4e8K+dB5g3TXNduHfEyPljcVFnx99FISMwyjjRXPvRjAV0S0DMBiAPOEEB8CmAPgZCJaD+Bk6burUJOLYK+wqbmlJHgEalGesXCNf3/SDuVZkm49vbc/jW9WWrLWZgzDMBETcT93IcTPAAaolO8GMN6KUU5D5JtMWh5hCoR60L6wjK8wuDeMsqeMGS45sQynD2iPotwM3H1Of9wypRey03kcGcMw9pOw6Qcq/zYBP/7tZP93teiInCLGaExdq8HPnxqYCEW5vgdDSnIS2uaYewNgGIYxSkK6jQRCXkZg33K1Pu3NKp671qxEX944DgUs1o7Sv2OrWJvAMHFDYoq7iieu57krg/TfzDpJtc7SNlmm9seYo1+HfNzIM0IxjGESMiyjprXBnjygHnM3E0rp1T5Pc3+MOfqU5CGF0xYzjGE8e7dkpCZhXE/1wVFqXRe7F+Xgxd8PCygTJmPuwbw8YzhemTGcRckC/NbDMJHhWdXJzUg1nYzrhO4F/rBLYW66aszdDPmZqbpzlDLhaZXpe6MyOlqYYRgfno25CyH8nndF59Y4Z0hHzHpzRdjtSlpl4umLj0fPdrm48/21AIDMtGTMv34sdhw46qTJjAoT+7bDfdMG4LT+JbE2hWHiCk+I+9i75+PyMd0CypSdX4Z3bYvuRTn+7+XFOdBjbE9frrPZZ/XFyO4FGFTaCkSkOQkH4xxEhLMHd4y1GQwTd3hC3Kt214V45QItPWCIgIzUlpGgD04PTCegRW5GKqYdX2qTlQzDMNEj7mPuWjnXlWEZgq+BFQC6FWYjP5PzpzMM423iWtzXbT+IyQ99pbrM57m3CH/wJNYMwzBeJq7DMkcbmrCm5oDqMqFM2UuEjq0zcdVJ3f0JuxiGYbxMXIu7XhdFZbiG4GuY+/MpPMKRYZjEIK7FXa/7ubJBVY/nLh2KL9fvssskhmEYVxDX4q47uEjAH5fRW21UeSFGlbtvmj+GYRgrxHWDarJOXoCArpCc3YVhmAQjrsVdL+dLQMydtZ1hmATDyhyqpUQ0n4jWENEqIrpGKr+diLYS0VLpb7J95obYoLlMOcE1azvDMImGlZh7I4A/CyF+kCbKXkJEn0jL7hdC3GPdPH30PPdmIfz93NlzZxgm0bAyh2oNgBrp80EiWgMgqp3IdbtCInReVIZhmETBlpg7EZUBGATgO6noSiJaTkRziUg17y4RzSCiSiKqrK2tjWi/4XrLtIxhYtedYZjEwrK4E1EOgDcAzBRCHADwGIBuAAbC59nfq7adEOIJIUSFEKKisDCyroh6mn3xyDKM6eGrd0Q3zqnOMExiYamfOxGlwifsLwgh3gQAIcQOxfInAbxnyUIdknSC7jdN6gUAqJozxandMw4yoLQVlv2yL9ZmMEzcErG4ky/W8RSANUKI+xTl7aV4PACcBWClNRO1iXT6O8b9vPnHE9DUzI0mDBMpVjz3EwFcAGAFES2Vyv4K4DwiGghfyLsKwB8sWahDMsfSPUtyEukOUmMYRh8rvWW+gnoX8vcjN8cc3FDKMAyjjmdHqDIMwyQycS7u6upenJceZUsYhmHcRVyLe7PGKCXd/u8MwzAJQFyLu1aDG4s7wzCJTlyLe6usNNVy1naGYRKduBZ3ACjJzwgpY8+dYZhEJ+7FXS3qzr1oGIZJdOJe3Ntkh4Zm9NISMAzDJAJxL+5zf3c8fl1RGlCWl5EaI2sYhmHcQdyLe3FeBq48qbv/+xXjuuHx84fE0CKGYZjYYykrpFvo2DoT153cA2cN6oDSNlmxNodhGCbmeELciQhXjy+PtRkMwzCuIe7DMgzDMEwoLO4MwzAehMWdYRjGg7C4MwzDeBAWd4ZhGA/C4s4wDONBWNwZhmE8CIs7wzCMByGhMZtRVI0gqgWw2UIVBQB22WSOnbBd5mC7zONW29guc0RqV2chRKHaAleIu1WIqFIIURFrO4Jhu8zBdpnHrbaxXeZwwi4OyzAMw3gQFneGYRgP4hVxfyLWBmjAdpmD7TKPW21ju8xhu12eiLkzDMMwgXjFc2cYhmEUsLgzDMN4kLgWdyKaSETriGgDEc2K8r5LiWg+Ea0holVEdI1UfjsRbSWipdLfZMU2N0m2riOiUx20rYqIVkj7r5TK2hDRJ0S0XvrfOpp2EVFPxTFZSkQHiGhmrI4XEc0lop1EtFJRZvoYEdEQ6VhvIKKHiMjS7Owadt1NRGuJaDkRvUVEraTyMiI6ojh2j0fZLtPnLkp2vaKwqYqIlkrl0TxeWvoQvWtMCBGXfwCSAWwE0BVAGoBlAHpHcf/tAQyWPucC+AlAbwC3A7heZf3eko3pALpItic7ZFsVgIKgsrsAzJI+zwLwr2jbFXTutgPoHKvjBWA0gMEAVlo5RgAWAxgBgAB8AGCSA3adAiBF+vwvhV1lyvWC6omGXabPXTTsClp+L4BbY3C8tPQhatdYPHvuQwFsEEL8LISoB/AygKnR2rkQokYI8YP0+SCANQA66GwyFcDLQohjQohNADbA9xuixVQAz0ifnwFwZgztGg9goxBCb1Syo3YJIRYC2KOyT8PHiIjaA8gTQiwSvrvwWcU2ttklhPhYCNEoff0WQEe9OqJllw4xPV4ykoc7DcBLenU4ZJeWPkTtGotnce8A4BfF92roi6tjEFEZgEEAvpOKrpReoecqXruiaa8A8DERLSGiGVJZsRCiBvBdeACKYmCXzHQE3nCxPl4yZo9RB+lzNG28BD7vTaYLEf1IRF8Q0SipLJp2mTl30T5eowDsEEKsV5RF/XgF6UPUrrF4Fne1uFPU+3USUQ6ANwDMFEIcAPAYgG4ABgKoge+1EIiuvScKIQYDmATgCiIarbNuVI8jEaUBOAPAa1KRG45XOLRsifaxuxlAI4AXpKIaAJ2EEIMAXAfgRSLKi6JdZs9dtM/peQh0IqJ+vFT0QXNVDRsiti2exb0aQKnie0cA26JpABGlwnfiXhBCvAkAQogdQogmIUQzgCfREkqImr1CiG3S/50A3pJs2CG94smvoTujbZfEJAA/CCF2SDbG/HgpMHuMqhEYInHMRiK6CMBpAH4rvZ5DeoXfLX1eAl+ctke07Irg3EXzeKUAOBvAKwp7o3q81PQBUbzG4lncvwdQTkRdJG9wOoB3orVzKZ73FIA1Qoj7FOXtFaudBUBuxX8HwHQiSieiLgDK4WsosduubCLKlT/D1xi3Utr/RdJqFwF4O5p2KQjwpmJ9vIIwdYyk1+qDRDRcuh4uVGxjG0Q0EcBfAJwhhKhTlBcSUbL0uatk189RtMvUuYuWXRITAKwVQvhDGtE8Xlr6gGheY1ZahGP9B2AyfK3QGwHcHOV9j4Tv9Wg5gKXS32QAzwFYIZW/A6C9YpubJVvXwWJrvI5dXeFrdV8GYJV8XAC0BfAZgPXS/zbRtEvaTxaA3QDyFWUxOV7wPWBqADTA5x1dGskxAlABn6htBPAwpFHfNtu1Ab54rHydPS6t+yvpHC8D8AOA06Nsl+lzFw27pPKnAVwetG40j5eWPkTtGuP0AwzDMB4knsMyDMMwjAYs7gzDMB6ExZ1hGMaDsLgzDMN4EBZ3hmEYD8LizjAM40FY3BmGYTzI/wdiRxsCNG7avgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    play_episode(env.unwrapped, agent,\n",
    "            max_episode_steps=env._max_episode_steps, mode='train')\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-10:]) > 195:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
